Coverage Report

Created: 2025-10-09 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/utils/adt/datum.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * datum.c
4
 *    POSTGRES Datum (abstract data type) manipulation routines.
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/utils/adt/datum.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
16
/*
17
 * In the implementation of these routines we assume the following:
18
 *
19
 * A) if a type is "byVal" then all the information is stored in the
20
 * Datum itself (i.e. no pointers involved!). In this case the
21
 * length of the type is always greater than zero and not more than
22
 * "sizeof(Datum)"
23
 *
24
 * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
25
 * then the "Datum" always contains a pointer to a stream of bytes.
26
 * The number of significant bytes are always equal to the typlen.
27
 *
28
 * C) if a type is not "byVal" and has typlen == -1,
29
 * then the "Datum" always points to a "struct varlena".
30
 * This varlena structure has information about the actual length of this
31
 * particular instance of the type and about its value.
32
 *
33
 * D) if a type is not "byVal" and has typlen == -2,
34
 * then the "Datum" always points to a null-terminated C string.
35
 *
36
 * Note that we do not treat "toasted" datums specially; therefore what
37
 * will be copied or compared is the compressed data or toast reference.
38
 * An exception is made for datumCopy() of an expanded object, however,
39
 * because most callers expect to get a simple contiguous (and pfree'able)
40
 * result from datumCopy().  See also datumTransfer().
41
 */
42
43
#include "postgres.h"
44
45
#include "access/detoast.h"
46
#include "common/hashfn.h"
47
#include "fmgr.h"
48
#include "utils/datum.h"
49
#include "utils/expandeddatum.h"
50
#include "utils/fmgrprotos.h"
51
52
53
/*-------------------------------------------------------------------------
54
 * datumGetSize
55
 *
56
 * Find the "real" size of a datum, given the datum value,
57
 * whether it is a "by value", and the declared type length.
58
 * (For TOAST pointer datums, this is the size of the pointer datum.)
59
 *
60
 * This is essentially an out-of-line version of the att_addlength_datum()
61
 * macro in access/tupmacs.h.  We do a tad more error checking though.
62
 *-------------------------------------------------------------------------
63
 */
64
Size
65
datumGetSize(Datum value, bool typByVal, int typLen)
66
0
{
67
0
  Size    size;
68
69
0
  if (typByVal)
70
0
  {
71
    /* Pass-by-value types are always fixed-length */
72
0
    Assert(typLen > 0 && typLen <= sizeof(Datum));
73
0
    size = (Size) typLen;
74
0
  }
75
0
  else
76
0
  {
77
0
    if (typLen > 0)
78
0
    {
79
      /* Fixed-length pass-by-ref type */
80
0
      size = (Size) typLen;
81
0
    }
82
0
    else if (typLen == -1)
83
0
    {
84
      /* It is a varlena datatype */
85
0
      struct varlena *s = (struct varlena *) DatumGetPointer(value);
86
87
0
      if (!s)
88
0
        ereport(ERROR,
89
0
            (errcode(ERRCODE_DATA_EXCEPTION),
90
0
             errmsg("invalid Datum pointer")));
91
92
0
      size = (Size) VARSIZE_ANY(s);
93
0
    }
94
0
    else if (typLen == -2)
95
0
    {
96
      /* It is a cstring datatype */
97
0
      char     *s = (char *) DatumGetPointer(value);
98
99
0
      if (!s)
100
0
        ereport(ERROR,
101
0
            (errcode(ERRCODE_DATA_EXCEPTION),
102
0
             errmsg("invalid Datum pointer")));
103
104
0
      size = (Size) (strlen(s) + 1);
105
0
    }
106
0
    else
107
0
    {
108
0
      elog(ERROR, "invalid typLen: %d", typLen);
109
0
      size = 0;     /* keep compiler quiet */
110
0
    }
111
0
  }
112
113
0
  return size;
114
0
}
115
116
/*-------------------------------------------------------------------------
117
 * datumCopy
118
 *
119
 * Make a copy of a non-NULL datum.
120
 *
121
 * If the datatype is pass-by-reference, memory is obtained with palloc().
122
 *
123
 * If the value is a reference to an expanded object, we flatten into memory
124
 * obtained with palloc().  We need to copy because one of the main uses of
125
 * this function is to copy a datum out of a transient memory context that's
126
 * about to be destroyed, and the expanded object is probably in a child
127
 * context that will also go away.  Moreover, many callers assume that the
128
 * result is a single pfree-able chunk.
129
 *-------------------------------------------------------------------------
130
 */
131
Datum
132
datumCopy(Datum value, bool typByVal, int typLen)
133
0
{
134
0
  Datum   res;
135
136
0
  if (typByVal)
137
0
    res = value;
138
0
  else if (typLen == -1)
139
0
  {
140
    /* It is a varlena datatype */
141
0
    struct varlena *vl = (struct varlena *) DatumGetPointer(value);
142
143
0
    if (VARATT_IS_EXTERNAL_EXPANDED(vl))
144
0
    {
145
      /* Flatten into the caller's memory context */
146
0
      ExpandedObjectHeader *eoh = DatumGetEOHP(value);
147
0
      Size    resultsize;
148
0
      char     *resultptr;
149
150
0
      resultsize = EOH_get_flat_size(eoh);
151
0
      resultptr = (char *) palloc(resultsize);
152
0
      EOH_flatten_into(eoh, resultptr, resultsize);
153
0
      res = PointerGetDatum(resultptr);
154
0
    }
155
0
    else
156
0
    {
157
      /* Otherwise, just copy the varlena datum verbatim */
158
0
      Size    realSize;
159
0
      char     *resultptr;
160
161
0
      realSize = (Size) VARSIZE_ANY(vl);
162
0
      resultptr = (char *) palloc(realSize);
163
0
      memcpy(resultptr, vl, realSize);
164
0
      res = PointerGetDatum(resultptr);
165
0
    }
166
0
  }
167
0
  else
168
0
  {
169
    /* Pass by reference, but not varlena, so not toasted */
170
0
    Size    realSize;
171
0
    char     *resultptr;
172
173
0
    realSize = datumGetSize(value, typByVal, typLen);
174
175
0
    resultptr = (char *) palloc(realSize);
176
0
    memcpy(resultptr, DatumGetPointer(value), realSize);
177
0
    res = PointerGetDatum(resultptr);
178
0
  }
179
0
  return res;
180
0
}
181
182
/*-------------------------------------------------------------------------
183
 * datumTransfer
184
 *
185
 * Transfer a non-NULL datum into the current memory context.
186
 *
187
 * This is equivalent to datumCopy() except when the datum is a read-write
188
 * pointer to an expanded object.  In that case we merely reparent the object
189
 * into the current context, and return its standard R/W pointer (in case the
190
 * given one is a transient pointer of shorter lifespan).
191
 *-------------------------------------------------------------------------
192
 */
193
Datum
194
datumTransfer(Datum value, bool typByVal, int typLen)
195
0
{
196
0
  if (!typByVal && typLen == -1 &&
197
0
    VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
198
0
    value = TransferExpandedObject(value, CurrentMemoryContext);
199
0
  else
200
0
    value = datumCopy(value, typByVal, typLen);
201
0
  return value;
202
0
}
203
204
/*-------------------------------------------------------------------------
205
 * datumIsEqual
206
 *
207
 * Return true if two datums are equal, false otherwise
208
 *
209
 * NOTE: XXX!
210
 * We just compare the bytes of the two values, one by one.
211
 * This routine will return false if there are 2 different
212
 * representations of the same value (something along the lines
213
 * of say the representation of zero in one's complement arithmetic).
214
 * Also, it will probably not give the answer you want if either
215
 * datum has been "toasted".
216
 *
217
 * Do not try to make this any smarter than it currently is with respect
218
 * to "toasted" datums, because some of the callers could be working in the
219
 * context of an aborted transaction.
220
 *-------------------------------------------------------------------------
221
 */
222
bool
223
datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
224
0
{
225
0
  bool    res;
226
227
0
  if (typByVal)
228
0
  {
229
    /*
230
     * just compare the two datums. NOTE: just comparing "len" bytes will
231
     * not do the work, because we do not know how these bytes are aligned
232
     * inside the "Datum".  We assume instead that any given datatype is
233
     * consistent about how it fills extraneous bits in the Datum.
234
     */
235
0
    res = (value1 == value2);
236
0
  }
237
0
  else
238
0
  {
239
0
    Size    size1,
240
0
          size2;
241
0
    char     *s1,
242
0
           *s2;
243
244
    /*
245
     * Compare the bytes pointed by the pointers stored in the datums.
246
     */
247
0
    size1 = datumGetSize(value1, typByVal, typLen);
248
0
    size2 = datumGetSize(value2, typByVal, typLen);
249
0
    if (size1 != size2)
250
0
      return false;
251
0
    s1 = (char *) DatumGetPointer(value1);
252
0
    s2 = (char *) DatumGetPointer(value2);
253
0
    res = (memcmp(s1, s2, size1) == 0);
254
0
  }
255
0
  return res;
256
0
}
257
258
/*-------------------------------------------------------------------------
259
 * datum_image_eq
260
 *
261
 * Compares two datums for identical contents, based on byte images.  Return
262
 * true if the two datums are equal, false otherwise.
263
 *-------------------------------------------------------------------------
264
 */
265
bool
266
datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
267
0
{
268
0
  Size    len1,
269
0
        len2;
270
0
  bool    result = true;
271
272
0
  if (typByVal)
273
0
  {
274
0
    result = (value1 == value2);
275
0
  }
276
0
  else if (typLen > 0)
277
0
  {
278
0
    result = (memcmp(DatumGetPointer(value1),
279
0
             DatumGetPointer(value2),
280
0
             typLen) == 0);
281
0
  }
282
0
  else if (typLen == -1)
283
0
  {
284
0
    len1 = toast_raw_datum_size(value1);
285
0
    len2 = toast_raw_datum_size(value2);
286
    /* No need to de-toast if lengths don't match. */
287
0
    if (len1 != len2)
288
0
      result = false;
289
0
    else
290
0
    {
291
0
      struct varlena *arg1val;
292
0
      struct varlena *arg2val;
293
294
0
      arg1val = PG_DETOAST_DATUM_PACKED(value1);
295
0
      arg2val = PG_DETOAST_DATUM_PACKED(value2);
296
297
0
      result = (memcmp(VARDATA_ANY(arg1val),
298
0
               VARDATA_ANY(arg2val),
299
0
               len1 - VARHDRSZ) == 0);
300
301
      /* Only free memory if it's a copy made here. */
302
0
      if ((Pointer) arg1val != DatumGetPointer(value1))
303
0
        pfree(arg1val);
304
0
      if ((Pointer) arg2val != DatumGetPointer(value2))
305
0
        pfree(arg2val);
306
0
    }
307
0
  }
308
0
  else if (typLen == -2)
309
0
  {
310
0
    char     *s1,
311
0
           *s2;
312
313
    /* Compare cstring datums */
314
0
    s1 = DatumGetCString(value1);
315
0
    s2 = DatumGetCString(value2);
316
0
    len1 = strlen(s1) + 1;
317
0
    len2 = strlen(s2) + 1;
318
0
    if (len1 != len2)
319
0
      return false;
320
0
    result = (memcmp(s1, s2, len1) == 0);
321
0
  }
322
0
  else
323
0
    elog(ERROR, "unexpected typLen: %d", typLen);
324
325
0
  return result;
326
0
}
327
328
/*-------------------------------------------------------------------------
329
 * datum_image_hash
330
 *
331
 * Generate a hash value based on the binary representation of 'value'.  Most
332
 * use cases will want to use the hash function specific to the Datum's type,
333
 * however, some corner cases require generating a hash value based on the
334
 * actual bits rather than the logical value.
335
 *-------------------------------------------------------------------------
336
 */
337
uint32
338
datum_image_hash(Datum value, bool typByVal, int typLen)
339
0
{
340
0
  Size    len;
341
0
  uint32    result;
342
343
0
  if (typByVal)
344
0
    result = hash_bytes((unsigned char *) &value, sizeof(Datum));
345
0
  else if (typLen > 0)
346
0
    result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
347
0
  else if (typLen == -1)
348
0
  {
349
0
    struct varlena *val;
350
351
0
    len = toast_raw_datum_size(value);
352
353
0
    val = PG_DETOAST_DATUM_PACKED(value);
354
355
0
    result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
356
357
    /* Only free memory if it's a copy made here. */
358
0
    if ((Pointer) val != DatumGetPointer(value))
359
0
      pfree(val);
360
0
  }
361
0
  else if (typLen == -2)
362
0
  {
363
0
    char     *s;
364
365
0
    s = DatumGetCString(value);
366
0
    len = strlen(s) + 1;
367
368
0
    result = hash_bytes((unsigned char *) s, len);
369
0
  }
370
0
  else
371
0
  {
372
0
    elog(ERROR, "unexpected typLen: %d", typLen);
373
0
    result = 0;       /* keep compiler quiet */
374
0
  }
375
376
0
  return result;
377
0
}
378
379
/*-------------------------------------------------------------------------
380
 * btequalimage
381
 *
382
 * Generic "equalimage" support function.
383
 *
384
 * B-Tree operator classes whose equality function could safely be replaced by
385
 * datum_image_eq() in all cases can use this as their "equalimage" support
386
 * function.
387
 *
388
 * Currently, we unconditionally assume that any B-Tree operator class that
389
 * registers btequalimage as its support function 4 must be able to safely use
390
 * optimizations like deduplication (i.e. we return true unconditionally).  If
391
 * it ever proved necessary to rescind support for an operator class, we could
392
 * do that in a targeted fashion by doing something with the opcintype
393
 * argument.
394
 *-------------------------------------------------------------------------
395
 */
396
Datum
397
btequalimage(PG_FUNCTION_ARGS)
398
0
{
399
  /* Oid    opcintype = PG_GETARG_OID(0); */
400
401
0
  PG_RETURN_BOOL(true);
402
0
}
403
404
/*-------------------------------------------------------------------------
405
 * datumEstimateSpace
406
 *
407
 * Compute the amount of space that datumSerialize will require for a
408
 * particular Datum.
409
 *-------------------------------------------------------------------------
410
 */
411
Size
412
datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
413
0
{
414
0
  Size    sz = sizeof(int);
415
416
0
  if (!isnull)
417
0
  {
418
    /* no need to use add_size, can't overflow */
419
0
    if (typByVal)
420
0
      sz += sizeof(Datum);
421
0
    else if (typLen == -1 &&
422
0
         VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
423
0
    {
424
      /* Expanded objects need to be flattened, see comment below */
425
0
      sz += EOH_get_flat_size(DatumGetEOHP(value));
426
0
    }
427
0
    else
428
0
      sz += datumGetSize(value, typByVal, typLen);
429
0
  }
430
431
0
  return sz;
432
0
}
433
434
/*-------------------------------------------------------------------------
435
 * datumSerialize
436
 *
437
 * Serialize a possibly-NULL datum into caller-provided storage.
438
 *
439
 * Note: "expanded" objects are flattened so as to produce a self-contained
440
 * representation, but other sorts of toast pointers are transferred as-is.
441
 * This is because the intended use of this function is to pass the value
442
 * to another process within the same database server.  The other process
443
 * could not access an "expanded" object within this process's memory, but
444
 * we assume it can dereference the same TOAST pointers this one can.
445
 *
446
 * The format is as follows: first, we write a 4-byte header word, which
447
 * is either the length of a pass-by-reference datum, -1 for a
448
 * pass-by-value datum, or -2 for a NULL.  If the value is NULL, nothing
449
 * further is written.  If it is pass-by-value, sizeof(Datum) bytes
450
 * follow.  Otherwise, the number of bytes indicated by the header word
451
 * follow.  The caller is responsible for ensuring that there is enough
452
 * storage to store the number of bytes that will be written; use
453
 * datumEstimateSpace() to find out how many will be needed.
454
 * *start_address is updated to point to the byte immediately following
455
 * those written.
456
 *-------------------------------------------------------------------------
457
 */
458
void
459
datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
460
         char **start_address)
461
0
{
462
0
  ExpandedObjectHeader *eoh = NULL;
463
0
  int     header;
464
465
  /* Write header word. */
466
0
  if (isnull)
467
0
    header = -2;
468
0
  else if (typByVal)
469
0
    header = -1;
470
0
  else if (typLen == -1 &&
471
0
       VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
472
0
  {
473
0
    eoh = DatumGetEOHP(value);
474
0
    header = EOH_get_flat_size(eoh);
475
0
  }
476
0
  else
477
0
    header = datumGetSize(value, typByVal, typLen);
478
0
  memcpy(*start_address, &header, sizeof(int));
479
0
  *start_address += sizeof(int);
480
481
  /* If not null, write payload bytes. */
482
0
  if (!isnull)
483
0
  {
484
0
    if (typByVal)
485
0
    {
486
0
      memcpy(*start_address, &value, sizeof(Datum));
487
0
      *start_address += sizeof(Datum);
488
0
    }
489
0
    else if (eoh)
490
0
    {
491
0
      char     *tmp;
492
493
      /*
494
       * EOH_flatten_into expects the target address to be maxaligned,
495
       * so we can't store directly to *start_address.
496
       */
497
0
      tmp = (char *) palloc(header);
498
0
      EOH_flatten_into(eoh, tmp, header);
499
0
      memcpy(*start_address, tmp, header);
500
0
      *start_address += header;
501
502
      /* be tidy. */
503
0
      pfree(tmp);
504
0
    }
505
0
    else
506
0
    {
507
0
      memcpy(*start_address, DatumGetPointer(value), header);
508
0
      *start_address += header;
509
0
    }
510
0
  }
511
0
}
512
513
/*-------------------------------------------------------------------------
514
 * datumRestore
515
 *
516
 * Restore a possibly-NULL datum previously serialized by datumSerialize.
517
 * *start_address is updated according to the number of bytes consumed.
518
 *-------------------------------------------------------------------------
519
 */
520
Datum
521
datumRestore(char **start_address, bool *isnull)
522
0
{
523
0
  int     header;
524
0
  void     *d;
525
526
  /* Read header word. */
527
0
  memcpy(&header, *start_address, sizeof(int));
528
0
  *start_address += sizeof(int);
529
530
  /* If this datum is NULL, we can stop here. */
531
0
  if (header == -2)
532
0
  {
533
0
    *isnull = true;
534
0
    return (Datum) 0;
535
0
  }
536
537
  /* OK, datum is not null. */
538
0
  *isnull = false;
539
540
  /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
541
0
  if (header == -1)
542
0
  {
543
0
    Datum   val;
544
545
0
    memcpy(&val, *start_address, sizeof(Datum));
546
0
    *start_address += sizeof(Datum);
547
0
    return val;
548
0
  }
549
550
  /* Pass-by-reference case; copy indicated number of bytes. */
551
0
  Assert(header > 0);
552
0
  d = palloc(header);
553
0
  memcpy(d, *start_address, header);
554
0
  *start_address += header;
555
0
  return PointerGetDatum(d);
556
0
}