/src/postgres/src/backend/utils/adt/numutils.c

Source
/*-------------------------------------------------------------------------
 *
 * numutils.c
 *    utility functions for I/O of built-in numeric types.
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *    src/backend/utils/adt/numutils.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <math.h>
#include <limits.h>
#include <ctype.h>

#include "common/int.h"
#include "port/pg_bitutils.h"
#include "utils/builtins.h"

/*
 * A table of all two-digit numbers. This is used to speed up decimal digit
 * generation by copying pairs of digits into the final output.
 */
static const char DIGIT_TABLE[200] =
"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";

/*
 * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
 */
static inline int
decimalLength32(const uint32 v)
{
  int     t;
  static const uint32 PowersOfTen[] = {
    1, 10, 100,
    1000, 10000, 100000,
    1000000, 10000000, 100000000,
    1000000000
  };

  /*
   * Compute base-10 logarithm by dividing the base-2 logarithm by a
   * good-enough approximation of the base-2 logarithm of 10
   */
  t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
  return t + (v >= PowersOfTen[t]);
}

static inline int
decimalLength64(const uint64 v)
{
  int     t;
  static const uint64 PowersOfTen[] = {
    UINT64CONST(1), UINT64CONST(10),
    UINT64CONST(100), UINT64CONST(1000),
    UINT64CONST(10000), UINT64CONST(100000),
    UINT64CONST(1000000), UINT64CONST(10000000),
    UINT64CONST(100000000), UINT64CONST(1000000000),
    UINT64CONST(10000000000), UINT64CONST(100000000000),
    UINT64CONST(1000000000000), UINT64CONST(10000000000000),
    UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
    UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
    UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
  };

  /*
   * Compute base-10 logarithm by dividing the base-2 logarithm by a
   * good-enough approximation of the base-2 logarithm of 10
   */
  t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
  return t + (v >= PowersOfTen[t]);
}

static const int8 hexlookup[128] = {
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

/*
 * Convert input string to a signed 16 bit integer.  Input strings may be
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
 * can be prefixed by an optional sign character, either '+' (the default) or
 * '-' for negative numbers.  Hex strings are recognized by the digits being
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
 *
 * Allows any number of leading or trailing whitespace characters.  Digits may
 * optionally be separated by a single underscore character.  These can only
 * come between digits and not before or after the digits.  Underscores have
 * no effect on the return value and are supported only to assist in improving
 * the human readability of the input strings.
 *
 * pg_strtoint16() will throw ereport() upon bad input format or overflow;
 * while pg_strtoint16_safe() instead returns such complaints in *escontext,
 * if it's an ErrorSaveContext.
*
 * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
 * signed positive number.
 */
int16
pg_strtoint16(const char *s)
{
  return pg_strtoint16_safe(s, NULL);
}

int16
pg_strtoint16_safe(const char *s, Node *escontext)
{
  const char *ptr = s;
  const char *firstdigit;
  uint16    tmp = 0;
  bool    neg = false;
  unsigned char digit;
  int16   result;

  /*
   * The majority of cases are likely to be base-10 digits without any
   * underscore separator characters.  We'll first try to parse the string
   * with the assumption that's the case and only fallback on a slower
   * implementation which handles hex, octal and binary strings and
   * underscores if the fastpath version cannot parse the string.
   */

  /* leave it up to the slow path to look for leading spaces */

  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }

  /* a leading '+' is uncommon so leave that for the slow path */

  /* process the first digit */
  digit = (*ptr - '0');

  /*
   * Exploit unsigned arithmetic to save having to check both the upper and
   * lower bounds of the digit.
   */
  if (likely(digit < 10))
  {
    ptr++;
    tmp = digit;
  }
  else
  {
    /* we need at least one digit */
    goto slow;
  }

  /* process remaining digits */
  for (;;)
  {
    digit = (*ptr - '0');

    if (digit >= 10)
      break;

    ptr++;

    if (unlikely(tmp > -(PG_INT16_MIN / 10)))
      goto out_of_range;

    tmp = tmp * 10 + digit;
  }

  /* when the string does not end in a digit, let the slow path handle it */
  if (unlikely(*ptr != '\0'))
    goto slow;

  if (neg)
  {
    if (unlikely(pg_neg_u16_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (unlikely(tmp > PG_INT16_MAX))
    goto out_of_range;

  return (int16) tmp;

slow:
  tmp = 0;
  ptr = s;
  /* no need to reset neg */

  /* skip leading spaces */
  while (isspace((unsigned char) *ptr))
    ptr++;

  /* handle sign */
  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }
  else if (*ptr == '+')
    ptr++;

  /* process digits */
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (isxdigit((unsigned char) *ptr))
      {
        if (unlikely(tmp > -(PG_INT16_MIN / 16)))
          goto out_of_range;

        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '7')
      {
        if (unlikely(tmp > -(PG_INT16_MIN / 8)))
          goto out_of_range;

        tmp = tmp * 8 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '1')
      {
        if (unlikely(tmp > -(PG_INT16_MIN / 2)))
          goto out_of_range;

        tmp = tmp * 2 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else
  {
    firstdigit = ptr;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '9')
      {
        if (unlikely(tmp > -(PG_INT16_MIN / 10)))
          goto out_of_range;

        tmp = tmp * 10 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore may not be first */
        if (unlikely(ptr == firstdigit))
          goto invalid_syntax;
        /* and it must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }

  /* require at least one digit */
  if (unlikely(ptr == firstdigit))
    goto invalid_syntax;

  /* allow trailing whitespace, but not other trailing chars */
  while (isspace((unsigned char) *ptr))
    ptr++;

  if (unlikely(*ptr != '\0'))
    goto invalid_syntax;

  if (neg)
  {
    if (unlikely(pg_neg_u16_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (tmp > PG_INT16_MAX)
    goto out_of_range;

  return (int16) tmp;

out_of_range:
  ereturn(escontext, 0,
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
       errmsg("value \"%s\" is out of range for type %s",
          s, "smallint")));

invalid_syntax:
  ereturn(escontext, 0,
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
       errmsg("invalid input syntax for type %s: \"%s\"",
          "smallint", s)));
}

/*
 * Convert input string to a signed 32 bit integer.  Input strings may be
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
 * can be prefixed by an optional sign character, either '+' (the default) or
 * '-' for negative numbers.  Hex strings are recognized by the digits being
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
 *
 * Allows any number of leading or trailing whitespace characters.  Digits may
 * optionally be separated by a single underscore character.  These can only
 * come between digits and not before or after the digits.  Underscores have
 * no effect on the return value and are supported only to assist in improving
 * the human readability of the input strings.
 *
 * pg_strtoint32() will throw ereport() upon bad input format or overflow;
 * while pg_strtoint32_safe() instead returns such complaints in *escontext,
 * if it's an ErrorSaveContext.
 *
 * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
 * signed positive number.
 */
int32
pg_strtoint32(const char *s)
{
  return pg_strtoint32_safe(s, NULL);
}

int32
pg_strtoint32_safe(const char *s, Node *escontext)
{
  const char *ptr = s;
  const char *firstdigit;
  uint32    tmp = 0;
  bool    neg = false;
  unsigned char digit;
  int32   result;

  /*
   * The majority of cases are likely to be base-10 digits without any
   * underscore separator characters.  We'll first try to parse the string
   * with the assumption that's the case and only fallback on a slower
   * implementation which handles hex, octal and binary strings and
   * underscores if the fastpath version cannot parse the string.
   */

  /* leave it up to the slow path to look for leading spaces */

  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }

  /* a leading '+' is uncommon so leave that for the slow path */

  /* process the first digit */
  digit = (*ptr - '0');

  /*
   * Exploit unsigned arithmetic to save having to check both the upper and
   * lower bounds of the digit.
   */
  if (likely(digit < 10))
  {
    ptr++;
    tmp = digit;
  }
  else
  {
    /* we need at least one digit */
    goto slow;
  }

  /* process remaining digits */
  for (;;)
  {
    digit = (*ptr - '0');

    if (digit >= 10)
      break;

    ptr++;

    if (unlikely(tmp > -(PG_INT32_MIN / 10)))
      goto out_of_range;

    tmp = tmp * 10 + digit;
  }

  /* when the string does not end in a digit, let the slow path handle it */
  if (unlikely(*ptr != '\0'))
    goto slow;

  if (neg)
  {
    if (unlikely(pg_neg_u32_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (unlikely(tmp > PG_INT32_MAX))
    goto out_of_range;

  return (int32) tmp;

slow:
  tmp = 0;
  ptr = s;
  /* no need to reset neg */

  /* skip leading spaces */
  while (isspace((unsigned char) *ptr))
    ptr++;

  /* handle sign */
  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }
  else if (*ptr == '+')
    ptr++;

  /* process digits */
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (isxdigit((unsigned char) *ptr))
      {
        if (unlikely(tmp > -(PG_INT32_MIN / 16)))
          goto out_of_range;

        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '7')
      {
        if (unlikely(tmp > -(PG_INT32_MIN / 8)))
          goto out_of_range;

        tmp = tmp * 8 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '1')
      {
        if (unlikely(tmp > -(PG_INT32_MIN / 2)))
          goto out_of_range;

        tmp = tmp * 2 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else
  {
    firstdigit = ptr;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '9')
      {
        if (unlikely(tmp > -(PG_INT32_MIN / 10)))
          goto out_of_range;

        tmp = tmp * 10 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore may not be first */
        if (unlikely(ptr == firstdigit))
          goto invalid_syntax;
        /* and it must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }

  /* require at least one digit */
  if (unlikely(ptr == firstdigit))
    goto invalid_syntax;

  /* allow trailing whitespace, but not other trailing chars */
  while (isspace((unsigned char) *ptr))
    ptr++;

  if (unlikely(*ptr != '\0'))
    goto invalid_syntax;

  if (neg)
  {
    if (unlikely(pg_neg_u32_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (tmp > PG_INT32_MAX)
    goto out_of_range;

  return (int32) tmp;

out_of_range:
  ereturn(escontext, 0,
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
       errmsg("value \"%s\" is out of range for type %s",
          s, "integer")));

invalid_syntax:
  ereturn(escontext, 0,
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
       errmsg("invalid input syntax for type %s: \"%s\"",
          "integer", s)));
}

/*
 * Convert input string to a signed 64 bit integer.  Input strings may be
 * expressed in base-10, hexadecimal, octal, or binary format, all of which
 * can be prefixed by an optional sign character, either '+' (the default) or
 * '-' for negative numbers.  Hex strings are recognized by the digits being
 * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
 * prefix.  The binary representation is recognized by the 0b or 0B prefix.
 *
 * Allows any number of leading or trailing whitespace characters.  Digits may
 * optionally be separated by a single underscore character.  These can only
 * come between digits and not before or after the digits.  Underscores have
 * no effect on the return value and are supported only to assist in improving
 * the human readability of the input strings.
 *
 * pg_strtoint64() will throw ereport() upon bad input format or overflow;
 * while pg_strtoint64_safe() instead returns such complaints in *escontext,
 * if it's an ErrorSaveContext.
 *
 * NB: Accumulate input as an unsigned number, to deal with two's complement
 * representation of the most negative number, which can't be represented as a
 * signed positive number.
 */
int64
pg_strtoint64(const char *s)
{
  return pg_strtoint64_safe(s, NULL);
}

int64
pg_strtoint64_safe(const char *s, Node *escontext)
{
  const char *ptr = s;
  const char *firstdigit;
  uint64    tmp = 0;
  bool    neg = false;
  unsigned char digit;
  int64   result;

  /*
   * The majority of cases are likely to be base-10 digits without any
   * underscore separator characters.  We'll first try to parse the string
   * with the assumption that's the case and only fallback on a slower
   * implementation which handles hex, octal and binary strings and
   * underscores if the fastpath version cannot parse the string.
   */

  /* leave it up to the slow path to look for leading spaces */

  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }

  /* a leading '+' is uncommon so leave that for the slow path */

  /* process the first digit */
  digit = (*ptr - '0');

  /*
   * Exploit unsigned arithmetic to save having to check both the upper and
   * lower bounds of the digit.
   */
  if (likely(digit < 10))
  {
    ptr++;
    tmp = digit;
  }
  else
  {
    /* we need at least one digit */
    goto slow;
  }

  /* process remaining digits */
  for (;;)
  {
    digit = (*ptr - '0');

    if (digit >= 10)
      break;

    ptr++;

    if (unlikely(tmp > -(PG_INT64_MIN / 10)))
      goto out_of_range;

    tmp = tmp * 10 + digit;
  }

  /* when the string does not end in a digit, let the slow path handle it */
  if (unlikely(*ptr != '\0'))
    goto slow;

  if (neg)
  {
    if (unlikely(pg_neg_u64_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (unlikely(tmp > PG_INT64_MAX))
    goto out_of_range;

  return (int64) tmp;

slow:
  tmp = 0;
  ptr = s;
  /* no need to reset neg */

  /* skip leading spaces */
  while (isspace((unsigned char) *ptr))
    ptr++;

  /* handle sign */
  if (*ptr == '-')
  {
    ptr++;
    neg = true;
  }
  else if (*ptr == '+')
    ptr++;

  /* process digits */
  if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (isxdigit((unsigned char) *ptr))
      {
        if (unlikely(tmp > -(PG_INT64_MIN / 16)))
          goto out_of_range;

        tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isxdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '7')
      {
        if (unlikely(tmp > -(PG_INT64_MIN / 8)))
          goto out_of_range;

        tmp = tmp * 8 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '7')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
  {
    firstdigit = ptr += 2;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '1')
      {
        if (unlikely(tmp > -(PG_INT64_MIN / 2)))
          goto out_of_range;

        tmp = tmp * 2 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || *ptr < '0' || *ptr > '1')
          goto invalid_syntax;
      }
      else
        break;
    }
  }
  else
  {
    firstdigit = ptr;

    for (;;)
    {
      if (*ptr >= '0' && *ptr <= '9')
      {
        if (unlikely(tmp > -(PG_INT64_MIN / 10)))
          goto out_of_range;

        tmp = tmp * 10 + (*ptr++ - '0');
      }
      else if (*ptr == '_')
      {
        /* underscore may not be first */
        if (unlikely(ptr == firstdigit))
          goto invalid_syntax;
        /* and it must be followed by more digits */
        ptr++;
        if (*ptr == '\0' || !isdigit((unsigned char) *ptr))
          goto invalid_syntax;
      }
      else
        break;
    }
  }

  /* require at least one digit */
  if (unlikely(ptr == firstdigit))
    goto invalid_syntax;

  /* allow trailing whitespace, but not other trailing chars */
  while (isspace((unsigned char) *ptr))
    ptr++;

  if (unlikely(*ptr != '\0'))
    goto invalid_syntax;

  if (neg)
  {
    if (unlikely(pg_neg_u64_overflow(tmp, &result)))
      goto out_of_range;
    return result;
  }

  if (tmp > PG_INT64_MAX)
    goto out_of_range;

  return (int64) tmp;

out_of_range:
  ereturn(escontext, 0,
      (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
       errmsg("value \"%s\" is out of range for type %s",
          s, "bigint")));

invalid_syntax:
  ereturn(escontext, 0,
      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
       errmsg("invalid input syntax for type %s: \"%s\"",
          "bigint", s)));
}

/*
 * Convert input string to an unsigned 32 bit integer.
 *
 * Allows any number of leading or trailing whitespace characters.
 *
 * If endloc isn't NULL, store a pointer to the rest of the string there,
 * so that caller can parse the rest.  Otherwise, it's an error if anything
 * but whitespace follows.
 *
 * typname is what is reported in error messages.
 *
 * If escontext points to an ErrorSaveContext node, that is filled instead
 * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
 * to detect errors.
 */
uint32
uint32in_subr(const char *s, char **endloc,
        const char *typname, Node *escontext)
{
  uint32    result;
  unsigned long cvt;
  char     *endptr;

  errno = 0;
  cvt = strtoul(s, &endptr, 0);

  /*
   * strtoul() normally only sets ERANGE.  On some systems it may also set
   * EINVAL, which simply means it couldn't parse the input string.  Be sure
   * to report that the same way as the standard error indication (that
   * endptr == s).
   */
  if ((errno && errno != ERANGE) || endptr == s)
    ereturn(escontext, 0,
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
         errmsg("invalid input syntax for type %s: \"%s\"",
            typname, s)));

  if (errno == ERANGE)
    ereturn(escontext, 0,
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
         errmsg("value \"%s\" is out of range for type %s",
            s, typname)));

  if (endloc)
  {
    /* caller wants to deal with rest of string */
    *endloc = endptr;
  }
  else
  {
    /* allow only whitespace after number */
    while (*endptr && isspace((unsigned char) *endptr))
      endptr++;
    if (*endptr)
      ereturn(escontext, 0,
          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
           errmsg("invalid input syntax for type %s: \"%s\"",
              typname, s)));
  }

  result = (uint32) cvt;

  /*
   * Cope with possibility that unsigned long is wider than uint32, in which
   * case strtoul will not raise an error for some values that are out of
   * the range of uint32.
   *
   * For backwards compatibility, we want to accept inputs that are given
   * with a minus sign, so allow the input value if it matches after either
   * signed or unsigned extension to long.
   *
   * To ensure consistent results on 32-bit and 64-bit platforms, make sure
   * the error message is the same as if strtoul() had returned ERANGE.
   */
#if PG_UINT32_MAX != ULONG_MAX
  if (cvt != (unsigned long) result &&
    cvt != (unsigned long) ((int) result))
    ereturn(escontext, 0,
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
         errmsg("value \"%s\" is out of range for type %s",
            s, typname)));
#endif

  return result;
}

/*
 * Convert input string to an unsigned 64 bit integer.
 *
 * Allows any number of leading or trailing whitespace characters.
 *
 * If endloc isn't NULL, store a pointer to the rest of the string there,
 * so that caller can parse the rest.  Otherwise, it's an error if anything
 * but whitespace follows.
 *
 * typname is what is reported in error messages.
 *
 * If escontext points to an ErrorSaveContext node, that is filled instead
 * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
 * to detect errors.
 */
uint64
uint64in_subr(const char *s, char **endloc,
        const char *typname, Node *escontext)
{
  uint64    result;
  char     *endptr;

  errno = 0;
  result = strtou64(s, &endptr, 0);

  /*
   * strtoul[l] normally only sets ERANGE.  On some systems it may also set
   * EINVAL, which simply means it couldn't parse the input string.  Be sure
   * to report that the same way as the standard error indication (that
   * endptr == s).
   */
  if ((errno && errno != ERANGE) || endptr == s)
    ereturn(escontext, 0,
        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
         errmsg("invalid input syntax for type %s: \"%s\"",
            typname, s)));

  if (errno == ERANGE)
    ereturn(escontext, 0,
        (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
         errmsg("value \"%s\" is out of range for type %s",
            s, typname)));

  if (endloc)
  {
    /* caller wants to deal with rest of string */
    *endloc = endptr;
  }
  else
  {
    /* allow only whitespace after number */
    while (*endptr && isspace((unsigned char) *endptr))
      endptr++;
    if (*endptr)
      ereturn(escontext, 0,
          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
           errmsg("invalid input syntax for type %s: \"%s\"",
              typname, s)));
  }

  return result;
}

/*
 * pg_itoa: converts a signed 16-bit integer to its string representation
 * and returns strlen(a).
 *
 * Caller must ensure that 'a' points to enough memory to hold the result
 * (at least 7 bytes, counting a leading sign and trailing NUL).
 *
 * It doesn't seem worth implementing this separately.
 */
int
pg_itoa(int16 i, char *a)
{
  return pg_ltoa((int32) i, a);
}

/*
 * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
 * not NUL-terminated, and returns the length of that string representation
 *
 * Caller must ensure that 'a' points to enough memory to hold the result (at
 * least 10 bytes)
 */
int
pg_ultoa_n(uint32 value, char *a)
{
  int     olength,
        i = 0;

  /* Degenerate case */
  if (value == 0)
  {
    *a = '0';
    return 1;
  }

  olength = decimalLength32(value);

  /* Compute the result string. */
  while (value >= 10000)
  {
    const uint32 c = value - 10000 * (value / 10000);
    const uint32 c0 = (c % 100) << 1;
    const uint32 c1 = (c / 100) << 1;

    char     *pos = a + olength - i;

    value /= 10000;

    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
    i += 4;
  }
  if (value >= 100)
  {
    const uint32 c = (value % 100) << 1;

    char     *pos = a + olength - i;

    value /= 100;

    memcpy(pos - 2, DIGIT_TABLE + c, 2);
    i += 2;
  }
  if (value >= 10)
  {
    const uint32 c = value << 1;

    char     *pos = a + olength - i;

    memcpy(pos - 2, DIGIT_TABLE + c, 2);
  }
  else
  {
    *a = (char) ('0' + value);
  }

  return olength;
}

/*
 * pg_ltoa: converts a signed 32-bit integer to its string representation and
 * returns strlen(a).
 *
 * It is the caller's responsibility to ensure that a is at least 12 bytes long,
 * which is enough room to hold a minus sign, a maximally long int32, and the
 * above terminating NUL.
 */
int
pg_ltoa(int32 value, char *a)
{
  uint32    uvalue = (uint32) value;
  int     len = 0;

  if (value < 0)
  {
    uvalue = (uint32) 0 - uvalue;
    a[len++] = '-';
  }
  len += pg_ultoa_n(uvalue, a + len);
  a[len] = '\0';
  return len;
}

/*
 * Get the decimal representation, not NUL-terminated, and return the length of
 * same.  Caller must ensure that a points to at least MAXINT8LEN bytes.
 */
int
pg_ulltoa_n(uint64 value, char *a)
{
  int     olength,
        i = 0;
  uint32    value2;

  /* Degenerate case */
  if (value == 0)
  {
    *a = '0';
    return 1;
  }

  olength = decimalLength64(value);

  /* Compute the result string. */
  while (value >= 100000000)
  {
    const uint64 q = value / 100000000;
    uint32    value3 = (uint32) (value - 100000000 * q);

    const uint32 c = value3 % 10000;
    const uint32 d = value3 / 10000;
    const uint32 c0 = (c % 100) << 1;
    const uint32 c1 = (c / 100) << 1;
    const uint32 d0 = (d % 100) << 1;
    const uint32 d1 = (d / 100) << 1;

    char     *pos = a + olength - i;

    value = q;

    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
    memcpy(pos - 6, DIGIT_TABLE + d0, 2);
    memcpy(pos - 8, DIGIT_TABLE + d1, 2);
    i += 8;
  }

  /* Switch to 32-bit for speed */
  value2 = (uint32) value;

  if (value2 >= 10000)
  {
    const uint32 c = value2 - 10000 * (value2 / 10000);
    const uint32 c0 = (c % 100) << 1;
    const uint32 c1 = (c / 100) << 1;

    char     *pos = a + olength - i;

    value2 /= 10000;

    memcpy(pos - 2, DIGIT_TABLE + c0, 2);
    memcpy(pos - 4, DIGIT_TABLE + c1, 2);
    i += 4;
  }
  if (value2 >= 100)
  {
    const uint32 c = (value2 % 100) << 1;
    char     *pos = a + olength - i;

    value2 /= 100;

    memcpy(pos - 2, DIGIT_TABLE + c, 2);
    i += 2;
  }
  if (value2 >= 10)
  {
    const uint32 c = value2 << 1;
    char     *pos = a + olength - i;

    memcpy(pos - 2, DIGIT_TABLE + c, 2);
  }
  else
    *a = (char) ('0' + value2);

  return olength;
}

/*
 * pg_lltoa: converts a signed 64-bit integer to its string representation and
 * returns strlen(a).
 *
 * Caller must ensure that 'a' points to enough memory to hold the result
 * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
 */
int
pg_lltoa(int64 value, char *a)
{
  uint64    uvalue = value;
  int     len = 0;

  if (value < 0)
  {
    uvalue = (uint64) 0 - uvalue;
    a[len++] = '-';
  }

  len += pg_ulltoa_n(uvalue, a + len);
  a[len] = '\0';
  return len;
}


/*
 * pg_ultostr_zeropad
 *    Converts 'value' into a decimal string representation stored at 'str'.
 *    'minwidth' specifies the minimum width of the result; any extra space
 *    is filled up by prefixing the number with zeros.
 *
 * Returns the ending address of the string result (the last character written
 * plus 1).  Note that no NUL terminator is written.
 *
 * The intended use-case for this function is to build strings that contain
 * multiple individual numbers, for example:
 *
 *  str = pg_ultostr_zeropad(str, hours, 2);
 *  *str++ = ':';
 *  str = pg_ultostr_zeropad(str, mins, 2);
 *  *str++ = ':';
 *  str = pg_ultostr_zeropad(str, secs, 2);
 *  *str = '\0';
 *
 * Note: Caller must ensure that 'str' points to enough memory to hold the
 * result.
 */
char *
pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
{
  int     len;

  Assert(minwidth > 0);

  if (value < 100 && minwidth == 2) /* Short cut for common case */
  {
    memcpy(str, DIGIT_TABLE + value * 2, 2);
    return str + 2;
  }

  len = pg_ultoa_n(value, str);
  if (len >= minwidth)
    return str + len;

  memmove(str + minwidth - len, str, len);
  memset(str, '0', minwidth - len);
  return str + minwidth;
}

/*
 * pg_ultostr
 *    Converts 'value' into a decimal string representation stored at 'str'.
 *
 * Returns the ending address of the string result (the last character written
 * plus 1).  Note that no NUL terminator is written.
 *
 * The intended use-case for this function is to build strings that contain
 * multiple individual numbers, for example:
 *
 *  str = pg_ultostr(str, a);
 *  *str++ = ' ';
 *  str = pg_ultostr(str, b);
 *  *str = '\0';
 *
 * Note: Caller must ensure that 'str' points to enough memory to hold the
 * result.
 */
char *
pg_ultostr(char *str, uint32 value)
{
  int     len = pg_ultoa_n(value, str);

  return str + len;
}

Coverage Report

Created: 2025-09-27 06:52