Coverage Report

Created: 2025-09-27 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/postgres/src/backend/utils/adt/numutils.c
Line
Count
Source
1
/*-------------------------------------------------------------------------
2
 *
3
 * numutils.c
4
 *    utility functions for I/O of built-in numeric types.
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/utils/adt/numutils.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
#include "postgres.h"
16
17
#include <math.h>
18
#include <limits.h>
19
#include <ctype.h>
20
21
#include "common/int.h"
22
#include "port/pg_bitutils.h"
23
#include "utils/builtins.h"
24
25
/*
26
 * A table of all two-digit numbers. This is used to speed up decimal digit
27
 * generation by copying pairs of digits into the final output.
28
 */
29
static const char DIGIT_TABLE[200] =
30
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
31
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
32
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
33
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
34
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
35
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
36
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
37
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
38
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
39
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
40
41
/*
42
 * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
43
 */
44
static inline int
45
decimalLength32(const uint32 v)
46
0
{
47
0
  int     t;
48
0
  static const uint32 PowersOfTen[] = {
49
0
    1, 10, 100,
50
0
    1000, 10000, 100000,
51
0
    1000000, 10000000, 100000000,
52
0
    1000000000
53
0
  };
54
55
  /*
56
   * Compute base-10 logarithm by dividing the base-2 logarithm by a
57
   * good-enough approximation of the base-2 logarithm of 10
58
   */
59
0
  t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
60
0
  return t + (v >= PowersOfTen[t]);
61
0
}
62
63
static inline int
64
decimalLength64(const uint64 v)
65
0
{
66
0
  int     t;
67
0
  static const uint64 PowersOfTen[] = {
68
0
    UINT64CONST(1), UINT64CONST(10),
69
0
    UINT64CONST(100), UINT64CONST(1000),
70
0
    UINT64CONST(10000), UINT64CONST(100000),
71
0
    UINT64CONST(1000000), UINT64CONST(10000000),
72
0
    UINT64CONST(100000000), UINT64CONST(1000000000),
73
0
    UINT64CONST(10000000000), UINT64CONST(100000000000),
74
0
    UINT64CONST(1000000000000), UINT64CONST(10000000000000),
75
0
    UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
76
0
    UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
77
0
    UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
78
0
  };
79
80
  /*
81
   * Compute base-10 logarithm by dividing the base-2 logarithm by a
82
   * good-enough approximation of the base-2 logarithm of 10
83
   */
84
0
  t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
85
0
  return t + (v >= PowersOfTen[t]);
86
0
}
87
88
static const int8 hexlookup[128] = {
89
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
93
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
96
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
97
};
98
99
/*
100
 * Convert input string to a signed 16 bit integer.  Input strings may be
101
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
102
 * can be prefixed by an optional sign character, either '+' (the default) or
103
 * '-' for negative numbers.  Hex strings are recognized by the digits being
104
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
105
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
106
 *
107
 * Allows any number of leading or trailing whitespace characters.  Digits may
108
 * optionally be separated by a single underscore character.  These can only
109
 * come between digits and not before or after the digits.  Underscores have
110
 * no effect on the return value and are supported only to assist in improving
111
 * the human readability of the input strings.
112
 *
113
 * pg_strtoint16() will throw ereport() upon bad input format or overflow;
114
 * while pg_strtoint16_safe() instead returns such complaints in *escontext,
115
 * if it's an ErrorSaveContext.
116
*
117
 * NB: Accumulate input as an unsigned number, to deal with two's complement
118
 * representation of the most negative number, which can't be represented as a
119
 * signed positive number.
120
 */
121
int16
122
pg_strtoint16(const char *s)
123
0
{
124
0
  return pg_strtoint16_safe(s, NULL);
125
0
}
126
127
int16
128
pg_strtoint16_safe(const char *s, Node *escontext)
129
0
{
130
0
  const char *ptr = s;
131
0
  const char *firstdigit;
132
0
  uint16    tmp = 0;
133
0
  bool    neg = false;
134
0
  unsigned char digit;
135
0
  int16   result;
136
137
  /*
138
   * The majority of cases are likely to be base-10 digits without any
139
   * underscore separator characters.  We'll first try to parse the string
140
   * with the assumption that's the case and only fallback on a slower
141
   * implementation which handles hex, octal and binary strings and
142
   * underscores if the fastpath version cannot parse the string.
143
   */
144
145
  /* leave it up to the slow path to look for leading spaces */
146
147
0
  if (*ptr == '-')
148
0
  {
149
0
    ptr++;
150
0
    neg = true;
151
0
  }
152
153
  /* a leading '+' is uncommon so leave that for the slow path */
154
155
  /* process the first digit */
156
0
  digit = (*ptr - '0');
157
158
  /*
159
   * Exploit unsigned arithmetic to save having to check both the upper and
160
   * lower bounds of the digit.
161
   */
162
0
  if (likely(digit < 10))
163
0
  {
164
0
    ptr++;
165
0
    tmp = digit;
166
0
  }
167
0
  else
168
0
  {
169
    /* we need at least one digit */
170
0
    goto slow;
171
0
  }
172
173
  /* process remaining digits */
174
0
  for (;;)
175
0
  {
176
0
    digit = (*ptr - '0');
177
178
0
    if (digit >= 10)
179
0
      break;
180
181
0
    ptr++;
182
183
0
    if (unlikely(tmp > -(PG_INT16_MIN / 10)))
184
0
      goto out_of_range;
185
186
0
    tmp = tmp * 10 + digit;
187
0
  }
188
189
  /* when the string does not end in a digit, let the slow path handle it */
190
0
  if (unlikely(*ptr != '\0'))
191
0
    goto slow;
192
193
0
  if (neg)
194
0
  {
195
0
    if (unlikely(pg_neg_u16_overflow(tmp, &result)))
196
0
      goto out_of_range;
197
0
    return result;
198
0
  }
199
200
0
  if (unlikely(tmp > PG_INT16_MAX))
201
0
    goto out_of_range;
202
203
0
  return (int16) tmp;
204
205
0
slow:
206
0
  tmp = 0;
207
0
  ptr = s;
208
  /* no need to reset neg */
209
210
  /* skip leading spaces */
211
0
  while (isspace((unsigned char) *ptr))
212
0
    ptr++;
213
214
  /* handle sign */
215
0
  if (*ptr == '-')
216
0
  {
217
0
    ptr++;
218
0
    neg = true;
219
0
  }
220
0
  else if (*ptr == '+')
221
0
    ptr++;
222
223
  /* process digits */
224
0
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
225
0
  {
226
0
    firstdigit = ptr += 2;
227
228
0
    for (;;)
229
0
    {
230
0
      if (isxdigit((unsigned char) *ptr))
231
0
      {
232
0
        if (unlikely(tmp > -(PG_INT16_MIN / 16)))
233
0
          goto out_of_range;
234
235
0
        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
236
0
      }
237
0
      else if (*ptr == '_')
238
0
      {
239
        /* underscore must be followed by more digits */
240
0
        ptr++;
241
0
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
242
0
          goto invalid_syntax;
243
0
      }
244
0
      else
245
0
        break;
246
0
    }
247
0
  }
248
0
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
249
0
  {
250
0
    firstdigit = ptr += 2;
251
252
0
    for (;;)
253
0
    {
254
0
      if (*ptr >= '0' && *ptr <= '7')
255
0
      {
256
0
        if (unlikely(tmp > -(PG_INT16_MIN / 8)))
257
0
          goto out_of_range;
258
259
0
        tmp = tmp * 8 + (*ptr++ - '0');
260
0
      }
261
0
      else if (*ptr == '_')
262
0
      {
263
        /* underscore must be followed by more digits */
264
0
        ptr++;
265
0
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
266
0
          goto invalid_syntax;
267
0
      }
268
0
      else
269
0
        break;
270
0
    }
271
0
  }
272
0
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
273
0
  {
274
0
    firstdigit = ptr += 2;
275
276
0
    for (;;)
277
0
    {
278
0
      if (*ptr >= '0' && *ptr <= '1')
279
0
      {
280
0
        if (unlikely(tmp > -(PG_INT16_MIN / 2)))
281
0
          goto out_of_range;
282
283
0
        tmp = tmp * 2 + (*ptr++ - '0');
284
0
      }
285
0
      else if (*ptr == '_')
286
0
      {
287
        /* underscore must be followed by more digits */
288
0
        ptr++;
289
0
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
290
0
          goto invalid_syntax;
291
0
      }
292
0
      else
293
0
        break;
294
0
    }
295
0
  }
296
0
  else
297
0
  {
298
0
    firstdigit = ptr;
299
300
0
    for (;;)
301
0
    {
302
0
      if (*ptr >= '0' && *ptr <= '9')
303
0
      {
304
0
        if (unlikely(tmp > -(PG_INT16_MIN / 10)))
305
0
          goto out_of_range;
306
307
0
        tmp = tmp * 10 + (*ptr++ - '0');
308
0
      }
309
0
      else if (*ptr == '_')
310
0
      {
311
        /* underscore may not be first */
312
0
        if (unlikely(ptr == firstdigit))
313
0
          goto invalid_syntax;
314
        /* and it must be followed by more digits */
315
0
        ptr++;
316
0
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
317
0
          goto invalid_syntax;
318
0
      }
319
0
      else
320
0
        break;
321
0
    }
322
0
  }
323
324
  /* require at least one digit */
325
0
  if (unlikely(ptr == firstdigit))
326
0
    goto invalid_syntax;
327
328
  /* allow trailing whitespace, but not other trailing chars */
329
0
  while (isspace((unsigned char) *ptr))
330
0
    ptr++;
331
332
0
  if (unlikely(*ptr != '\0'))
333
0
    goto invalid_syntax;
334
335
0
  if (neg)
336
0
  {
337
0
    if (unlikely(pg_neg_u16_overflow(tmp, &result)))
338
0
      goto out_of_range;
339
0
    return result;
340
0
  }
341
342
0
  if (tmp > PG_INT16_MAX)
343
0
    goto out_of_range;
344
345
0
  return (int16) tmp;
346
347
0
out_of_range:
348
0
  ereturn(escontext, 0,
349
0
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
350
0
       errmsg("value \"%s\" is out of range for type %s",
351
0
          s, "smallint")));
352
353
0
invalid_syntax:
354
0
  ereturn(escontext, 0,
355
0
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
356
0
       errmsg("invalid input syntax for type %s: \"%s\"",
357
0
          "smallint", s)));
358
0
}
359
360
/*
361
 * Convert input string to a signed 32 bit integer.  Input strings may be
362
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
363
 * can be prefixed by an optional sign character, either '+' (the default) or
364
 * '-' for negative numbers.  Hex strings are recognized by the digits being
365
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
366
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
367
 *
368
 * Allows any number of leading or trailing whitespace characters.  Digits may
369
 * optionally be separated by a single underscore character.  These can only
370
 * come between digits and not before or after the digits.  Underscores have
371
 * no effect on the return value and are supported only to assist in improving
372
 * the human readability of the input strings.
373
 *
374
 * pg_strtoint32() will throw ereport() upon bad input format or overflow;
375
 * while pg_strtoint32_safe() instead returns such complaints in *escontext,
376
 * if it's an ErrorSaveContext.
377
 *
378
 * NB: Accumulate input as an unsigned number, to deal with two's complement
379
 * representation of the most negative number, which can't be represented as a
380
 * signed positive number.
381
 */
382
int32
383
pg_strtoint32(const char *s)
384
0
{
385
0
  return pg_strtoint32_safe(s, NULL);
386
0
}
387
388
int32
389
pg_strtoint32_safe(const char *s, Node *escontext)
390
220k
{
391
220k
  const char *ptr = s;
392
220k
  const char *firstdigit;
393
220k
  uint32    tmp = 0;
394
220k
  bool    neg = false;
395
220k
  unsigned char digit;
396
220k
  int32   result;
397
398
  /*
399
   * The majority of cases are likely to be base-10 digits without any
400
   * underscore separator characters.  We'll first try to parse the string
401
   * with the assumption that's the case and only fallback on a slower
402
   * implementation which handles hex, octal and binary strings and
403
   * underscores if the fastpath version cannot parse the string.
404
   */
405
406
  /* leave it up to the slow path to look for leading spaces */
407
408
220k
  if (*ptr == '-')
409
0
  {
410
0
    ptr++;
411
0
    neg = true;
412
0
  }
413
414
  /* a leading '+' is uncommon so leave that for the slow path */
415
416
  /* process the first digit */
417
220k
  digit = (*ptr - '0');
418
419
  /*
420
   * Exploit unsigned arithmetic to save having to check both the upper and
421
   * lower bounds of the digit.
422
   */
423
220k
  if (likely(digit < 10))
424
220k
  {
425
220k
    ptr++;
426
220k
    tmp = digit;
427
220k
  }
428
0
  else
429
0
  {
430
    /* we need at least one digit */
431
0
    goto slow;
432
0
  }
433
434
  /* process remaining digits */
435
220k
  for (;;)
436
799k
  {
437
799k
    digit = (*ptr - '0');
438
439
799k
    if (digit >= 10)
440
220k
      break;
441
442
579k
    ptr++;
443
444
579k
    if (unlikely(tmp > -(PG_INT32_MIN / 10)))
445
446
      goto out_of_range;
446
447
578k
    tmp = tmp * 10 + digit;
448
578k
  }
449
450
  /* when the string does not end in a digit, let the slow path handle it */
451
220k
  if (unlikely(*ptr != '\0'))
452
3.00k
    goto slow;
453
454
217k
  if (neg)
455
0
  {
456
0
    if (unlikely(pg_neg_u32_overflow(tmp, &result)))
457
0
      goto out_of_range;
458
0
    return result;
459
0
  }
460
461
217k
  if (unlikely(tmp > PG_INT32_MAX))
462
335
    goto out_of_range;
463
464
216k
  return (int32) tmp;
465
466
3.00k
slow:
467
3.00k
  tmp = 0;
468
3.00k
  ptr = s;
469
  /* no need to reset neg */
470
471
  /* skip leading spaces */
472
3.00k
  while (isspace((unsigned char) *ptr))
473
0
    ptr++;
474
475
  /* handle sign */
476
3.00k
  if (*ptr == '-')
477
0
  {
478
0
    ptr++;
479
0
    neg = true;
480
0
  }
481
3.00k
  else if (*ptr == '+')
482
0
    ptr++;
483
484
  /* process digits */
485
3.00k
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
486
642
  {
487
642
    firstdigit = ptr += 2;
488
489
642
    for (;;)
490
3.95k
    {
491
3.95k
      if (isxdigit((unsigned char) *ptr))
492
3.31k
      {
493
3.31k
        if (unlikely(tmp > -(PG_INT32_MIN / 16)))
494
268
          goto out_of_range;
495
496
3.05k
        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
497
3.05k
      }
498
637
      else if (*ptr == '_')
499
263
      {
500
        /* underscore must be followed by more digits */
501
263
        ptr++;
502
263
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
503
0
          goto invalid_syntax;
504
263
      }
505
374
      else
506
374
        break;
507
3.95k
    }
508
642
  }
509
2.36k
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
510
633
  {
511
633
    firstdigit = ptr += 2;
512
513
633
    for (;;)
514
4.32k
    {
515
4.32k
      if (*ptr >= '0' && *ptr <= '7')
516
3.53k
      {
517
3.53k
        if (unlikely(tmp > -(PG_INT32_MIN / 8)))
518
219
          goto out_of_range;
519
520
3.31k
        tmp = tmp * 8 + (*ptr++ - '0');
521
3.31k
      }
522
788
      else if (*ptr == '_')
523
374
      {
524
        /* underscore must be followed by more digits */
525
374
        ptr++;
526
374
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
527
0
          goto invalid_syntax;
528
374
      }
529
414
      else
530
414
        break;
531
4.32k
    }
532
633
  }
533
1.72k
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
534
615
  {
535
615
    firstdigit = ptr += 2;
536
537
615
    for (;;)
538
7.91k
    {
539
7.91k
      if (*ptr >= '0' && *ptr <= '1')
540
7.22k
      {
541
7.22k
        if (unlikely(tmp > -(PG_INT32_MIN / 2)))
542
201
          goto out_of_range;
543
544
7.02k
        tmp = tmp * 2 + (*ptr++ - '0');
545
7.02k
      }
546
692
      else if (*ptr == '_')
547
278
      {
548
        /* underscore must be followed by more digits */
549
278
        ptr++;
550
278
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
551
0
          goto invalid_syntax;
552
278
      }
553
414
      else
554
414
        break;
555
7.91k
    }
556
615
  }
557
1.11k
  else
558
1.11k
  {
559
1.11k
    firstdigit = ptr;
560
561
1.11k
    for (;;)
562
9.71k
    {
563
9.71k
      if (*ptr >= '0' && *ptr <= '9')
564
7.64k
      {
565
7.64k
        if (unlikely(tmp > -(PG_INT32_MIN / 10)))
566
243
          goto out_of_range;
567
568
7.39k
        tmp = tmp * 10 + (*ptr++ - '0');
569
7.39k
      }
570
2.07k
      else if (*ptr == '_')
571
1.19k
      {
572
        /* underscore may not be first */
573
1.19k
        if (unlikely(ptr == firstdigit))
574
0
          goto invalid_syntax;
575
        /* and it must be followed by more digits */
576
1.19k
        ptr++;
577
1.19k
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
578
0
          goto invalid_syntax;
579
1.19k
      }
580
871
      else
581
871
        break;
582
9.71k
    }
583
1.11k
  }
584
585
  /* require at least one digit */
586
2.07k
  if (unlikely(ptr == firstdigit))
587
0
    goto invalid_syntax;
588
589
  /* allow trailing whitespace, but not other trailing chars */
590
2.07k
  while (isspace((unsigned char) *ptr))
591
0
    ptr++;
592
593
2.07k
  if (unlikely(*ptr != '\0'))
594
0
    goto invalid_syntax;
595
596
2.07k
  if (neg)
597
0
  {
598
0
    if (unlikely(pg_neg_u32_overflow(tmp, &result)))
599
0
      goto out_of_range;
600
0
    return result;
601
0
  }
602
603
2.07k
  if (tmp > PG_INT32_MAX)
604
276
    goto out_of_range;
605
606
1.79k
  return (int32) tmp;
607
608
1.98k
out_of_range:
609
1.98k
  ereturn(escontext, 0,
610
0
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
611
0
       errmsg("value \"%s\" is out of range for type %s",
612
0
          s, "integer")));
613
614
0
invalid_syntax:
615
0
  ereturn(escontext, 0,
616
0
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
617
0
       errmsg("invalid input syntax for type %s: \"%s\"",
618
0
          "integer", s)));
619
0
}
620
621
/*
622
 * Convert input string to a signed 64 bit integer.  Input strings may be
623
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
624
 * can be prefixed by an optional sign character, either '+' (the default) or
625
 * '-' for negative numbers.  Hex strings are recognized by the digits being
626
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
627
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
628
 *
629
 * Allows any number of leading or trailing whitespace characters.  Digits may
630
 * optionally be separated by a single underscore character.  These can only
631
 * come between digits and not before or after the digits.  Underscores have
632
 * no effect on the return value and are supported only to assist in improving
633
 * the human readability of the input strings.
634
 *
635
 * pg_strtoint64() will throw ereport() upon bad input format or overflow;
636
 * while pg_strtoint64_safe() instead returns such complaints in *escontext,
637
 * if it's an ErrorSaveContext.
638
 *
639
 * NB: Accumulate input as an unsigned number, to deal with two's complement
640
 * representation of the most negative number, which can't be represented as a
641
 * signed positive number.
642
 */
643
int64
644
pg_strtoint64(const char *s)
645
0
{
646
0
  return pg_strtoint64_safe(s, NULL);
647
0
}
648
649
int64
650
pg_strtoint64_safe(const char *s, Node *escontext)
651
0
{
652
0
  const char *ptr = s;
653
0
  const char *firstdigit;
654
0
  uint64    tmp = 0;
655
0
  bool    neg = false;
656
0
  unsigned char digit;
657
0
  int64   result;
658
659
  /*
660
   * The majority of cases are likely to be base-10 digits without any
661
   * underscore separator characters.  We'll first try to parse the string
662
   * with the assumption that's the case and only fallback on a slower
663
   * implementation which handles hex, octal and binary strings and
664
   * underscores if the fastpath version cannot parse the string.
665
   */
666
667
  /* leave it up to the slow path to look for leading spaces */
668
669
0
  if (*ptr == '-')
670
0
  {
671
0
    ptr++;
672
0
    neg = true;
673
0
  }
674
675
  /* a leading '+' is uncommon so leave that for the slow path */
676
677
  /* process the first digit */
678
0
  digit = (*ptr - '0');
679
680
  /*
681
   * Exploit unsigned arithmetic to save having to check both the upper and
682
   * lower bounds of the digit.
683
   */
684
0
  if (likely(digit < 10))
685
0
  {
686
0
    ptr++;
687
0
    tmp = digit;
688
0
  }
689
0
  else
690
0
  {
691
    /* we need at least one digit */
692
0
    goto slow;
693
0
  }
694
695
  /* process remaining digits */
696
0
  for (;;)
697
0
  {
698
0
    digit = (*ptr - '0');
699
700
0
    if (digit >= 10)
701
0
      break;
702
703
0
    ptr++;
704
705
0
    if (unlikely(tmp > -(PG_INT64_MIN / 10)))
706
0
      goto out_of_range;
707
708
0
    tmp = tmp * 10 + digit;
709
0
  }
710
711
  /* when the string does not end in a digit, let the slow path handle it */
712
0
  if (unlikely(*ptr != '\0'))
713
0
    goto slow;
714
715
0
  if (neg)
716
0
  {
717
0
    if (unlikely(pg_neg_u64_overflow(tmp, &result)))
718
0
      goto out_of_range;
719
0
    return result;
720
0
  }
721
722
0
  if (unlikely(tmp > PG_INT64_MAX))
723
0
    goto out_of_range;
724
725
0
  return (int64) tmp;
726
727
0
slow:
728
0
  tmp = 0;
729
0
  ptr = s;
730
  /* no need to reset neg */
731
732
  /* skip leading spaces */
733
0
  while (isspace((unsigned char) *ptr))
734
0
    ptr++;
735
736
  /* handle sign */
737
0
  if (*ptr == '-')
738
0
  {
739
0
    ptr++;
740
0
    neg = true;
741
0
  }
742
0
  else if (*ptr == '+')
743
0
    ptr++;
744
745
  /* process digits */
746
0
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
747
0
  {
748
0
    firstdigit = ptr += 2;
749
750
0
    for (;;)
751
0
    {
752
0
      if (isxdigit((unsigned char) *ptr))
753
0
      {
754
0
        if (unlikely(tmp > -(PG_INT64_MIN / 16)))
755
0
          goto out_of_range;
756
757
0
        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
758
0
      }
759
0
      else if (*ptr == '_')
760
0
      {
761
        /* underscore must be followed by more digits */
762
0
        ptr++;
763
0
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
764
0
          goto invalid_syntax;
765
0
      }
766
0
      else
767
0
        break;
768
0
    }
769
0
  }
770
0
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
771
0
  {
772
0
    firstdigit = ptr += 2;
773
774
0
    for (;;)
775
0
    {
776
0
      if (*ptr >= '0' && *ptr <= '7')
777
0
      {
778
0
        if (unlikely(tmp > -(PG_INT64_MIN / 8)))
779
0
          goto out_of_range;
780
781
0
        tmp = tmp * 8 + (*ptr++ - '0');
782
0
      }
783
0
      else if (*ptr == '_')
784
0
      {
785
        /* underscore must be followed by more digits */
786
0
        ptr++;
787
0
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
788
0
          goto invalid_syntax;
789
0
      }
790
0
      else
791
0
        break;
792
0
    }
793
0
  }
794
0
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
795
0
  {
796
0
    firstdigit = ptr += 2;
797
798
0
    for (;;)
799
0
    {
800
0
      if (*ptr >= '0' && *ptr <= '1')
801
0
      {
802
0
        if (unlikely(tmp > -(PG_INT64_MIN / 2)))
803
0
          goto out_of_range;
804
805
0
        tmp = tmp * 2 + (*ptr++ - '0');
806
0
      }
807
0
      else if (*ptr == '_')
808
0
      {
809
        /* underscore must be followed by more digits */
810
0
        ptr++;
811
0
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
812
0
          goto invalid_syntax;
813
0
      }
814
0
      else
815
0
        break;
816
0
    }
817
0
  }
818
0
  else
819
0
  {
820
0
    firstdigit = ptr;
821
822
0
    for (;;)
823
0
    {
824
0
      if (*ptr >= '0' && *ptr <= '9')
825
0
      {
826
0
        if (unlikely(tmp > -(PG_INT64_MIN / 10)))
827
0
          goto out_of_range;
828
829
0
        tmp = tmp * 10 + (*ptr++ - '0');
830
0
      }
831
0
      else if (*ptr == '_')
832
0
      {
833
        /* underscore may not be first */
834
0
        if (unlikely(ptr == firstdigit))
835
0
          goto invalid_syntax;
836
        /* and it must be followed by more digits */
837
0
        ptr++;
838
0
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
839
0
          goto invalid_syntax;
840
0
      }
841
0
      else
842
0
        break;
843
0
    }
844
0
  }
845
846
  /* require at least one digit */
847
0
  if (unlikely(ptr == firstdigit))
848
0
    goto invalid_syntax;
849
850
  /* allow trailing whitespace, but not other trailing chars */
851
0
  while (isspace((unsigned char) *ptr))
852
0
    ptr++;
853
854
0
  if (unlikely(*ptr != '\0'))
855
0
    goto invalid_syntax;
856
857
0
  if (neg)
858
0
  {
859
0
    if (unlikely(pg_neg_u64_overflow(tmp, &result)))
860
0
      goto out_of_range;
861
0
    return result;
862
0
  }
863
864
0
  if (tmp > PG_INT64_MAX)
865
0
    goto out_of_range;
866
867
0
  return (int64) tmp;
868
869
0
out_of_range:
870
0
  ereturn(escontext, 0,
871
0
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
872
0
       errmsg("value \"%s\" is out of range for type %s",
873
0
          s, "bigint")));
874
875
0
invalid_syntax:
876
0
  ereturn(escontext, 0,
877
0
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
878
0
       errmsg("invalid input syntax for type %s: \"%s\"",
879
0
          "bigint", s)));
880
0
}
881
882
/*
883
 * Convert input string to an unsigned 32 bit integer.
884
 *
885
 * Allows any number of leading or trailing whitespace characters.
886
 *
887
 * If endloc isn't NULL, store a pointer to the rest of the string there,
888
 * so that caller can parse the rest.  Otherwise, it's an error if anything
889
 * but whitespace follows.
890
 *
891
 * typname is what is reported in error messages.
892
 *
893
 * If escontext points to an ErrorSaveContext node, that is filled instead
894
 * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
895
 * to detect errors.
896
 */
897
uint32
898
uint32in_subr(const char *s, char **endloc,
899
        const char *typname, Node *escontext)
900
0
{
901
0
  uint32    result;
902
0
  unsigned long cvt;
903
0
  char     *endptr;
904
905
0
  errno = 0;
906
0
  cvt = strtoul(s, &endptr, 0);
907
908
  /*
909
   * strtoul() normally only sets ERANGE.  On some systems it may also set
910
   * EINVAL, which simply means it couldn't parse the input string.  Be sure
911
   * to report that the same way as the standard error indication (that
912
   * endptr == s).
913
   */
914
0
  if ((errno && errno != ERANGE) || endptr == s)
915
0
    ereturn(escontext, 0,
916
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
917
0
         errmsg("invalid input syntax for type %s: \"%s\"",
918
0
            typname, s)));
919
920
0
  if (errno == ERANGE)
921
0
    ereturn(escontext, 0,
922
0
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
923
0
         errmsg("value \"%s\" is out of range for type %s",
924
0
            s, typname)));
925
926
0
  if (endloc)
927
0
  {
928
    /* caller wants to deal with rest of string */
929
0
    *endloc = endptr;
930
0
  }
931
0
  else
932
0
  {
933
    /* allow only whitespace after number */
934
0
    while (*endptr && isspace((unsigned char) *endptr))
935
0
      endptr++;
936
0
    if (*endptr)
937
0
      ereturn(escontext, 0,
938
0
          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
939
0
           errmsg("invalid input syntax for type %s: \"%s\"",
940
0
              typname, s)));
941
0
  }
942
943
0
  result = (uint32) cvt;
944
945
  /*
946
   * Cope with possibility that unsigned long is wider than uint32, in which
947
   * case strtoul will not raise an error for some values that are out of
948
   * the range of uint32.
949
   *
950
   * For backwards compatibility, we want to accept inputs that are given
951
   * with a minus sign, so allow the input value if it matches after either
952
   * signed or unsigned extension to long.
953
   *
954
   * To ensure consistent results on 32-bit and 64-bit platforms, make sure
955
   * the error message is the same as if strtoul() had returned ERANGE.
956
   */
957
0
#if PG_UINT32_MAX != ULONG_MAX
958
0
  if (cvt != (unsigned long) result &&
959
0
    cvt != (unsigned long) ((int) result))
960
0
    ereturn(escontext, 0,
961
0
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
962
0
         errmsg("value \"%s\" is out of range for type %s",
963
0
            s, typname)));
964
0
#endif
965
966
0
  return result;
967
0
}
968
969
/*
970
 * Convert input string to an unsigned 64 bit integer.
971
 *
972
 * Allows any number of leading or trailing whitespace characters.
973
 *
974
 * If endloc isn't NULL, store a pointer to the rest of the string there,
975
 * so that caller can parse the rest.  Otherwise, it's an error if anything
976
 * but whitespace follows.
977
 *
978
 * typname is what is reported in error messages.
979
 *
980
 * If escontext points to an ErrorSaveContext node, that is filled instead
981
 * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
982
 * to detect errors.
983
 */
984
uint64
985
uint64in_subr(const char *s, char **endloc,
986
        const char *typname, Node *escontext)
987
0
{
988
0
  uint64    result;
989
0
  char     *endptr;
990
991
0
  errno = 0;
992
0
  result = strtou64(s, &endptr, 0);
993
994
  /*
995
   * strtoul[l] normally only sets ERANGE.  On some systems it may also set
996
   * EINVAL, which simply means it couldn't parse the input string.  Be sure
997
   * to report that the same way as the standard error indication (that
998
   * endptr == s).
999
   */
1000
0
  if ((errno && errno != ERANGE) || endptr == s)
1001
0
    ereturn(escontext, 0,
1002
0
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1003
0
         errmsg("invalid input syntax for type %s: \"%s\"",
1004
0
            typname, s)));
1005
1006
0
  if (errno == ERANGE)
1007
0
    ereturn(escontext, 0,
1008
0
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1009
0
         errmsg("value \"%s\" is out of range for type %s",
1010
0
            s, typname)));
1011
1012
0
  if (endloc)
1013
0
  {
1014
    /* caller wants to deal with rest of string */
1015
0
    *endloc = endptr;
1016
0
  }
1017
0
  else
1018
0
  {
1019
    /* allow only whitespace after number */
1020
0
    while (*endptr && isspace((unsigned char) *endptr))
1021
0
      endptr++;
1022
0
    if (*endptr)
1023
0
      ereturn(escontext, 0,
1024
0
          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1025
0
           errmsg("invalid input syntax for type %s: \"%s\"",
1026
0
              typname, s)));
1027
0
  }
1028
1029
0
  return result;
1030
0
}
1031
1032
/*
1033
 * pg_itoa: converts a signed 16-bit integer to its string representation
1034
 * and returns strlen(a).
1035
 *
1036
 * Caller must ensure that 'a' points to enough memory to hold the result
1037
 * (at least 7 bytes, counting a leading sign and trailing NUL).
1038
 *
1039
 * It doesn't seem worth implementing this separately.
1040
 */
1041
int
1042
pg_itoa(int16 i, char *a)
1043
0
{
1044
0
  return pg_ltoa((int32) i, a);
1045
0
}
1046
1047
/*
1048
 * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
1049
 * not NUL-terminated, and returns the length of that string representation
1050
 *
1051
 * Caller must ensure that 'a' points to enough memory to hold the result (at
1052
 * least 10 bytes)
1053
 */
1054
int
1055
pg_ultoa_n(uint32 value, char *a)
1056
0
{
1057
0
  int     olength,
1058
0
        i = 0;
1059
1060
  /* Degenerate case */
1061
0
  if (value == 0)
1062
0
  {
1063
0
    *a = '0';
1064
0
    return 1;
1065
0
  }
1066
1067
0
  olength = decimalLength32(value);
1068
1069
  /* Compute the result string. */
1070
0
  while (value >= 10000)
1071
0
  {
1072
0
    const uint32 c = value - 10000 * (value / 10000);
1073
0
    const uint32 c0 = (c % 100) << 1;
1074
0
    const uint32 c1 = (c / 100) << 1;
1075
1076
0
    char     *pos = a + olength - i;
1077
1078
0
    value /= 10000;
1079
1080
0
    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1081
0
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1082
0
    i += 4;
1083
0
  }
1084
0
  if (value >= 100)
1085
0
  {
1086
0
    const uint32 c = (value % 100) << 1;
1087
1088
0
    char     *pos = a + olength - i;
1089
1090
0
    value /= 100;
1091
1092
0
    memcpy(pos - 2, DIGIT_TABLE + c, 2);
1093
0
    i += 2;
1094
0
  }
1095
0
  if (value >= 10)
1096
0
  {
1097
0
    const uint32 c = value << 1;
1098
1099
0
    char     *pos = a + olength - i;
1100
1101
0
    memcpy(pos - 2, DIGIT_TABLE + c, 2);
1102
0
  }
1103
0
  else
1104
0
  {
1105
0
    *a = (char) ('0' + value);
1106
0
  }
1107
1108
0
  return olength;
1109
0
}
1110
1111
/*
1112
 * pg_ltoa: converts a signed 32-bit integer to its string representation and
1113
 * returns strlen(a).
1114
 *
1115
 * It is the caller's responsibility to ensure that a is at least 12 bytes long,
1116
 * which is enough room to hold a minus sign, a maximally long int32, and the
1117
 * above terminating NUL.
1118
 */
1119
int
1120
pg_ltoa(int32 value, char *a)
1121
0
{
1122
0
  uint32    uvalue = (uint32) value;
1123
0
  int     len = 0;
1124
1125
0
  if (value < 0)
1126
0
  {
1127
0
    uvalue = (uint32) 0 - uvalue;
1128
0
    a[len++] = '-';
1129
0
  }
1130
0
  len += pg_ultoa_n(uvalue, a + len);
1131
0
  a[len] = '\0';
1132
0
  return len;
1133
0
}
1134
1135
/*
1136
 * Get the decimal representation, not NUL-terminated, and return the length of
1137
 * same.  Caller must ensure that a points to at least MAXINT8LEN bytes.
1138
 */
1139
int
1140
pg_ulltoa_n(uint64 value, char *a)
1141
0
{
1142
0
  int     olength,
1143
0
        i = 0;
1144
0
  uint32    value2;
1145
1146
  /* Degenerate case */
1147
0
  if (value == 0)
1148
0
  {
1149
0
    *a = '0';
1150
0
    return 1;
1151
0
  }
1152
1153
0
  olength = decimalLength64(value);
1154
1155
  /* Compute the result string. */
1156
0
  while (value >= 100000000)
1157
0
  {
1158
0
    const uint64 q = value / 100000000;
1159
0
    uint32    value3 = (uint32) (value - 100000000 * q);
1160
1161
0
    const uint32 c = value3 % 10000;
1162
0
    const uint32 d = value3 / 10000;
1163
0
    const uint32 c0 = (c % 100) << 1;
1164
0
    const uint32 c1 = (c / 100) << 1;
1165
0
    const uint32 d0 = (d % 100) << 1;
1166
0
    const uint32 d1 = (d / 100) << 1;
1167
1168
0
    char     *pos = a + olength - i;
1169
1170
0
    value = q;
1171
1172
0
    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1173
0
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1174
0
    memcpy(pos - 6, DIGIT_TABLE + d0, 2);
1175
0
    memcpy(pos - 8, DIGIT_TABLE + d1, 2);
1176
0
    i += 8;
1177
0
  }
1178
1179
  /* Switch to 32-bit for speed */
1180
0
  value2 = (uint32) value;
1181
1182
0
  if (value2 >= 10000)
1183
0
  {
1184
0
    const uint32 c = value2 - 10000 * (value2 / 10000);
1185
0
    const uint32 c0 = (c % 100) << 1;
1186
0
    const uint32 c1 = (c / 100) << 1;
1187
1188
0
    char     *pos = a + olength - i;
1189
1190
0
    value2 /= 10000;
1191
1192
0
    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
1193
0
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
1194
0
    i += 4;
1195
0
  }
1196
0
  if (value2 >= 100)
1197
0
  {
1198
0
    const uint32 c = (value2 % 100) << 1;
1199
0
    char     *pos = a + olength - i;
1200
1201
0
    value2 /= 100;
1202
1203
0
    memcpy(pos - 2, DIGIT_TABLE + c, 2);
1204
0
    i += 2;
1205
0
  }
1206
0
  if (value2 >= 10)
1207
0
  {
1208
0
    const uint32 c = value2 << 1;
1209
0
    char     *pos = a + olength - i;
1210
1211
0
    memcpy(pos - 2, DIGIT_TABLE + c, 2);
1212
0
  }
1213
0
  else
1214
0
    *a = (char) ('0' + value2);
1215
1216
0
  return olength;
1217
0
}
1218
1219
/*
1220
 * pg_lltoa: converts a signed 64-bit integer to its string representation and
1221
 * returns strlen(a).
1222
 *
1223
 * Caller must ensure that 'a' points to enough memory to hold the result
1224
 * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
1225
 */
1226
int
1227
pg_lltoa(int64 value, char *a)
1228
0
{
1229
0
  uint64    uvalue = value;
1230
0
  int     len = 0;
1231
1232
0
  if (value < 0)
1233
0
  {
1234
0
    uvalue = (uint64) 0 - uvalue;
1235
0
    a[len++] = '-';
1236
0
  }
1237
1238
0
  len += pg_ulltoa_n(uvalue, a + len);
1239
0
  a[len] = '\0';
1240
0
  return len;
1241
0
}
1242
1243
1244
/*
1245
 * pg_ultostr_zeropad
1246
 *    Converts 'value' into a decimal string representation stored at 'str'.
1247
 *    'minwidth' specifies the minimum width of the result; any extra space
1248
 *    is filled up by prefixing the number with zeros.
1249
 *
1250
 * Returns the ending address of the string result (the last character written
1251
 * plus 1).  Note that no NUL terminator is written.
1252
 *
1253
 * The intended use-case for this function is to build strings that contain
1254
 * multiple individual numbers, for example:
1255
 *
1256
 *  str = pg_ultostr_zeropad(str, hours, 2);
1257
 *  *str++ = ':';
1258
 *  str = pg_ultostr_zeropad(str, mins, 2);
1259
 *  *str++ = ':';
1260
 *  str = pg_ultostr_zeropad(str, secs, 2);
1261
 *  *str = '\0';
1262
 *
1263
 * Note: Caller must ensure that 'str' points to enough memory to hold the
1264
 * result.
1265
 */
1266
char *
1267
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
1268
0
{
1269
0
  int     len;
1270
1271
0
  Assert(minwidth > 0);
1272
1273
0
  if (value < 100 && minwidth == 2) /* Short cut for common case */
1274
0
  {
1275
0
    memcpy(str, DIGIT_TABLE + value * 2, 2);
1276
0
    return str + 2;
1277
0
  }
1278
1279
0
  len = pg_ultoa_n(value, str);
1280
0
  if (len >= minwidth)
1281
0
    return str + len;
1282
1283
0
  memmove(str + minwidth - len, str, len);
1284
0
  memset(str, '0', minwidth - len);
1285
0
  return str + minwidth;
1286
0
}
1287
1288
/*
1289
 * pg_ultostr
1290
 *    Converts 'value' into a decimal string representation stored at 'str'.
1291
 *
1292
 * Returns the ending address of the string result (the last character written
1293
 * plus 1).  Note that no NUL terminator is written.
1294
 *
1295
 * The intended use-case for this function is to build strings that contain
1296
 * multiple individual numbers, for example:
1297
 *
1298
 *  str = pg_ultostr(str, a);
1299
 *  *str++ = ' ';
1300
 *  str = pg_ultostr(str, b);
1301
 *  *str = '\0';
1302
 *
1303
 * Note: Caller must ensure that 'str' points to enough memory to hold the
1304
 * result.
1305
 */
1306
char *
1307
pg_ultostr(char *str, uint32 value)
1308
0
{
1309
0
  int     len = pg_ultoa_n(value, str);
1310
1311
0
  return str + len;
1312
0
}