JsonParserBase.java
package tools.jackson.core.json;
import java.math.BigInteger;
import tools.jackson.core.*;
import tools.jackson.core.base.ParserBase;
import tools.jackson.core.exc.InputCoercionException;
import tools.jackson.core.exc.StreamReadException;
import tools.jackson.core.io.CharTypes;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.io.NumberInput;
import tools.jackson.core.util.JacksonFeatureSet;
/**
* Another intermediate base class, only used by actual JSON-backed parser
* implementations.
*
* @since 3.0
*/
public abstract class JsonParserBase
extends ParserBase
{
private final static char[] NO_CHARS = new char[0];
/*
/**********************************************************************
/* JSON-specific configuration
/**********************************************************************
*/
/**
* Bit flag for {@link JsonReadFeature}s that are enabled.
*/
protected int _formatReadFeatures;
/*
/**********************************************************************
/* Parsing state
/**********************************************************************
*/
/**
* Information about parser context, context in which
* the next token is to be parsed (root, array, object).
*/
protected JsonReadContext _streamReadContext;
/**
* Secondary token related to the next token after current one;
* used if its type is known. This may be value token that
* follows {@link JsonToken#PROPERTY_NAME}, for example.
*/
protected JsonToken _nextToken;
/**
* Marker for integer values read using JSON5 hexadecimal notation
* ({@code 0x} / {@code 0X} prefix), enabled via
* {@link JsonReadFeature#ALLOW_HEXADECIMAL_NUMBERS}.
* When {@code true}, the textual representation buffered for the current
* token is the original hex literal (including any sign and the
* {@code 0x}/{@code 0X} prefix) and {@link #_intLength} records the
* number of hexadecimal digits (excluding sign and prefix).
*
* @since 3.2
*/
protected boolean _numberIsHex;
/*
/**********************************************************************
/* Helper buffer recycling
/**********************************************************************
*/
/**
* Temporary buffer that is needed if an Object property name is accessed
* using {@link #getTextCharacters} method (instead of String
* returning alternatives)
*/
private char[] _nameCopyBuffer = NO_CHARS;
/**
* Flag set to indicate whether the Object property name is available
* from the name copy buffer or not (in addition to its String
* representation being available via read context)
*/
protected boolean _nameCopied;
/**
* Lazily-allocated intermediate buffer used by {@code _streamString()}
* implementations to batch writes to the target {@link java.io.Writer}.
* Allocated on first call and reused on subsequent calls to avoid
* repeated allocation for parsers that call {@code readString(Writer)}
* multiple times.
*
* @since 3.1
*/
private char[] _streamStringBuffer;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
protected JsonParserBase(ObjectReadContext readCtxt,
IOContext ctxt, int streamReadFeatures, int formatReadFeatures)
{
super(readCtxt, ctxt, streamReadFeatures);
_formatReadFeatures = formatReadFeatures;
DupDetector dups = StreamReadFeature.STRICT_DUPLICATE_DETECTION.enabledIn(streamReadFeatures)
? DupDetector.rootDetector(this) : null;
_streamReadContext = JsonReadContext.createRootContext(dups);
}
/*
/**********************************************************************
/* Versioned, capabilities, config
/**********************************************************************
*/
@Override public Version version() { return PackageVersion.VERSION; }
@Override
public JacksonFeatureSet<StreamReadCapability> streamReadCapabilities() {
// For now, JSON settings do not differ from general defaults:
return DEFAULT_READ_CAPABILITIES;
}
/*
/**********************************************************************
/* ParserBase method implementions/overrides
/**********************************************************************
*/
@Override public TokenStreamContext streamReadContext() { return _streamReadContext; }
@Override
public Object currentValue() {
return _streamReadContext.currentValue();
}
@Override
public void assignCurrentValue(Object v) {
_streamReadContext.assignCurrentValue(v);
}
/**
* Method that can be called to get the name associated with
* the current event.
*/
@Override public String currentName() {
// [JACKSON-395]: start markers require information from parent
if (_currToken == JsonToken.START_OBJECT || _currToken == JsonToken.START_ARRAY) {
JsonReadContext parent = _streamReadContext.getParent();
if (parent != null) {
return parent.currentName();
}
}
return _streamReadContext.currentName();
}
@Override
public boolean hasStringCharacters() {
if (_currToken == JsonToken.VALUE_STRING) { return true; } // usually true
if (_currToken == JsonToken.PROPERTY_NAME) { return _nameCopied; }
return false;
}
// 03-Nov-2019, tatu: Will not recycle "name copy buffer" any more as it seems
// unlikely to be of much real benefit
/*
@Override
protected void _releaseBuffers() {
super._releaseBuffers();
char[] buf = _nameCopyBuffer;
if (buf != null) {
_nameCopyBuffer = null;
_ioContext.releaseNameCopyBuffer(buf);
}
}
*/
/*
/**********************************************************************
/* Internal/package methods: Context handling
/**********************************************************************
*/
protected void createChildArrayContext(final int lineNr, final int colNr) throws JacksonException {
_streamReadContext = _streamReadContext.createChildArrayContext(lineNr, colNr);
_streamReadConstraints.validateNestingDepth(_streamReadContext.getNestingDepth());
}
protected void createChildObjectContext(final int lineNr, final int colNr) throws JacksonException {
_streamReadContext = _streamReadContext.createChildObjectContext(lineNr, colNr);
_streamReadConstraints.validateNestingDepth(_streamReadContext.getNestingDepth());
}
/*
/**********************************************************************
/* Numeric parsing method implementations
/**********************************************************************
*/
// Overridden to also clear the JSON-only `_numberIsHex` flag, so a
// subsequent regular integer is not mis-decoded as hex. Hex literals go
// through `resetIntHex` instead, which sets the flag.
@Override
protected JsonToken resetInt(boolean negative, int intLen)
throws JacksonException
{
_numberIsHex = false;
return super.resetInt(negative, intLen);
}
/**
* Variant of {@link #resetInt} used for integer values read in JSON5
* hexadecimal notation ({@code 0x...}). {@code hexDigitLen} is the
* number of hexadecimal digits (excluding sign and {@code 0x}/{@code 0X}
* prefix); the textual representation buffered by the caller is expected
* to contain the original literal including sign and prefix.
*
* @since 3.2
*/
protected final JsonToken resetIntHex(boolean negative, int hexDigitLen)
throws JacksonException
{
// May throw StreamConstraintsException:
_streamReadConstraints.validateIntegerLength(hexDigitLen);
_numberNegative = negative;
_numberIsNaN = false;
_numberIsHex = true;
_intLength = hexDigitLen;
_fractLength = 0;
_expLength = 0;
_numTypesValid = NR_UNKNOWN; // to force decoding
_numberString = null;
return JsonToken.VALUE_NUMBER_INT;
}
@Override
protected void _parseNumericValue(int expType)
throws JacksonException, InputCoercionException
{
// Int or float?
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
if (_numberIsHex) {
_parseHexInt(expType);
return;
}
int len = _intLength;
// First: optimization for simple int
if (len <= 9) {
int i = _textBuffer.contentsAsInt(_numberNegative);
_numberInt = i;
_numTypesValid = NR_INT;
return;
}
if (len <= 18) { // definitely fits AND is easy to parse using 2 int parse calls
long l = _textBuffer.contentsAsLong(_numberNegative);
// Might still fit in int, need to check
if (len == 10) {
if (_numberNegative) {
if (l >= MIN_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
} else {
if (l <= MAX_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
}
}
_numberLong = l;
_numTypesValid = NR_LONG;
return;
}
// For [core#865]: handle remaining 19-char cases as well
if (len == 19) {
char[] buf = _textBuffer.getTextBuffer();
int offset = _textBuffer.getTextOffset();
if (_numberNegative) {
++offset;
}
if (NumberInput.inLongRange(buf, offset, len, _numberNegative)) {
_numberLong = NumberInput.parseLong19(buf, offset, _numberNegative);
_numTypesValid = NR_LONG;
return;
}
}
_parseSlowInt(expType);
return;
}
if (_currToken == JsonToken.VALUE_NUMBER_FLOAT) {
_parseSlowFloat(expType);
return;
}
throw _constructNotNumericType(_currToken, expType);
}
@Override
protected int _parseIntValue() throws JacksonException
{
// Inlined variant of: _parseNumericValue(NR_INT)
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
// Hex integers go through the generic path so the base-16 decode is
// applied (the base-10 fast path below would mis-read the literal):
if (_intLength <= 9 && !_numberIsHex) {
int i = _textBuffer.contentsAsInt(_numberNegative);
_numberInt = i;
_numTypesValid = NR_INT;
return i;
}
}
// if not optimizable, use more generic
_parseNumericValue(NR_INT);
if ((_numTypesValid & NR_INT) == 0) {
convertNumberToInt();
}
return _numberInt;
}
private void _parseSlowFloat(int expType) throws JacksonException
{
/* Nope: floating point. Here we need to be careful to get
* optimal parsing strategy: choice is between accurate but
* slow (BigDecimal) and lossy but fast (Double). For now
* let's only use BD when explicitly requested -- it can
* still be constructed correctly at any point since we do
* retain textual representation
*/
if (expType == NR_BIGDECIMAL) {
// 04-Dec-2022, tatu: Let's defer actual decoding until it is certain
// value is actually needed.
// 24-Jun-2024, tatu: No; we shouldn't have to defer unless specifically
// request w/ `getNumberValueDeferred()` or so
_numberBigDecimal = _textBuffer.contentsAsDecimal(isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
_numTypesValid = NR_BIGDECIMAL;
} else if (expType == NR_DOUBLE) {
_numberDouble = _textBuffer.contentsAsDouble(isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
_numTypesValid = NR_DOUBLE;
} else if (expType == NR_FLOAT) {
_numberFloat = _textBuffer.contentsAsFloat(isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
_numTypesValid = NR_FLOAT;
} else { // NR_UNKOWN, or one of int types
// 04-Dec-2022, tatu: We can get all kinds of values here
// (NR_INT, NR_LONG or even NR_UNKNOWN). Should we try further
// deferring some typing?
_numberDouble = 0.0;
_numberString = _textBuffer.contentsAsString();
_numTypesValid = NR_DOUBLE;
}
}
/**
* Decode a JSON5 hexadecimal integer that was buffered as the original
* textual literal (sign + {@code 0x}/{@code 0X} prefix + hex digits).
* {@link #_intLength} holds the count of hex digits.
*
* @since 3.2
*/
private void _parseHexInt(int expType) throws JacksonException
{
final int hexLen = _intLength;
final char[] buf = _textBuffer.getTextBuffer();
// Locate the first hex digit: skip optional sign and "0x" / "0X" prefix
int idx = _textBuffer.getTextOffset();
final char first = buf[idx];
if (first == '-' || first == '+') {
++idx;
}
idx += 2; // skip "0x" / "0X"
// Up to 7 hex digits always fit in a positive signed int (<= 0x0FFFFFFF).
// 8 hex digits may overflow signed int (e.g. 0x80000000), so we defer to
// the long path which handles range checks uniformly.
if (hexLen <= 7) {
int v = 0;
for (int i = 0; i < hexLen; ++i) {
v = (v << 4) | CharTypes.charToHex(buf[idx + i]);
}
_numberInt = _numberNegative ? -v : v;
_numTypesValid = NR_INT;
return;
}
// 9..15 hex digits always fit in a positive long (63 bits used at most)
if (hexLen <= 15) {
long v = 0L;
for (int i = 0; i < hexLen; ++i) {
v = (v << 4) | CharTypes.charToHex(buf[idx + i]);
}
_numberLong = _numberNegative ? -v : v;
_numTypesValid = NR_LONG;
return;
}
// 16 hex digits: may or may not fit in signed long, depending on top bit
if (hexLen == 16) {
int topNibble = CharTypes.charToHex(buf[idx]);
if (topNibble < 0x8) { // fits in positive signed long
long v = topNibble;
for (int i = 1; i < 16; ++i) {
v = (v << 4) | CharTypes.charToHex(buf[idx + i]);
}
_numberLong = _numberNegative ? -v : v;
_numTypesValid = NR_LONG;
return;
}
// else fall through to BigInteger path
}
// Larger values -> BigInteger. We must eagerly decode here (the lazy
// base-10 path via _numberString would mis-read hex digits). Pass the
// char[] slice directly so the fast path avoids an intermediate String.
BigInteger bi = NumberInput.parseBigIntegerWithRadix(buf, idx, hexLen, 16,
isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
if (_numberNegative) {
bi = bi.negate();
}
_numberBigInt = bi;
_numberString = null;
_numTypesValid = NR_BIGINT;
if ((expType == NR_INT) || (expType == NR_LONG)) {
// Force the overflow path to surface a meaningful error
_reportTooLongIntegral(expType, _textBuffer.contentsAsString());
}
}
/**
* Standard error message used by all JSON parser variants when a
* {@code 0x}/{@code 0X} hex prefix is not followed by any hex digit.
*
* @since 3.2
*/
protected static String _hexPrefixNotFollowedMessage(char prefixChar) {
return "Hexadecimal number prefix '0" + prefixChar
+ "' must be followed by at least one hex digit (0-9, a-f, A-F)";
}
/**
* Called after seeing the {@code 'x'} or {@code 'X'} that follows a leading
* {@code '0'} in a number literal. Returns silently if
* {@link JsonReadFeature#ALLOW_HEXADECIMAL_NUMBERS} is enabled; otherwise
* throws a {@link StreamReadException} naming the feature that must be
* enabled, so the user gets a specific actionable error instead of a
* generic "unexpected character".
*
* @since 3.2
*/
protected void _checkHexNumbersAllowed(int prefixChar) throws StreamReadException {
if (!isEnabled(JsonReadFeature.ALLOW_HEXADECIMAL_NUMBERS)) {
_reportUnexpectedChar(prefixChar,
"hexadecimal number literals require enabling `JsonReadFeature.ALLOW_HEXADECIMAL_NUMBERS`");
}
}
private void _parseSlowInt(int expType) throws JacksonException
{
final String numStr = _textBuffer.contentsAsString();
// 16-Oct-2018, tatu: Need to catch "too big" early due to [jackson-core#488]
if ((expType == NR_INT) || (expType == NR_LONG)) {
_reportTooLongIntegral(expType, numStr);
}
if ((expType == NR_DOUBLE) || (expType == NR_FLOAT)) {
_numberString = numStr;
_numTypesValid = NR_DOUBLE;
} else {
// nope, need the heavy guns... (rare case) - since Jackson v2.14, BigInteger parsing is lazy
_numberBigInt = null;
_numberString = numStr;
_numTypesValid = NR_BIGINT;
}
}
protected void _reportTooLongIntegral(int expType, String rawNum) throws JacksonException
{
if (expType == NR_INT) {
_reportOverflowInt(rawNum);
}
_reportOverflowLong(rawNum);
}
/*
/**********************************************************************
/* Internal/package methods: config access
/**********************************************************************
*/
public boolean isEnabled(JsonReadFeature f) { return f.enabledIn(_formatReadFeatures); }
/*
/**********************************************************************
/* Internal/package methods: buffer handling
/**********************************************************************
*/
protected char[] currentNameInBuffer() {
if (_nameCopied) {
return _nameCopyBuffer;
}
final String name = _streamReadContext.currentName();
final int nameLen = name.length();
if (_nameCopyBuffer.length < nameLen) {
_nameCopyBuffer = new char[Math.max(32, nameLen)];
}
name.getChars(0, nameLen, _nameCopyBuffer, 0);
_nameCopied = true;
return _nameCopyBuffer;
}
/**
* Returns the lazily-allocated intermediate buffer used by
* {@code _streamString()} to batch-write decoded characters to a
* {@link java.io.Writer}. The same buffer is reused across calls.
*
* @since 3.1
*/
protected char[] _bufferForStringStreaming() {
char[] buf = _streamStringBuffer;
if (buf == null) {
_streamStringBuffer = buf = new char[1024];
}
return buf;
}
/*
/**********************************************************************
/* Internal/package methods: Error reporting
/**********************************************************************
*/
protected char _handleUnrecognizedCharacterEscape(char ch) throws StreamReadException {
// It is possible we allow all kinds of non-standard escapes...
if (isEnabled(JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER)) {
return ch;
}
// and if allowing single-quoted names, String values, single-quote needs to be escapable regardless
if (ch == '\'' && isEnabled(JsonReadFeature.ALLOW_SINGLE_QUOTES)) {
return ch;
}
throw _constructReadException("Unrecognized character escape "+_getCharDesc(ch),
_currentLocationMinusOne());
}
// Promoted from `ParserBase` in 3.0
protected void _reportMismatchedEndMarker(int actCh, char expCh) throws StreamReadException {
final TokenStreamContext ctxt = streamReadContext();
// 31-Jan-2025, tatu: [core#1394] Need to check case of no open scope
if (ctxt.inRoot()) {
_reportExtraEndMarker(actCh);
return;
}
final String msg = String.format(
"Unexpected close marker '%s': expected '%c' (for %s starting at %s)",
(char) actCh, expCh, ctxt.typeDesc(), ctxt.startLocation(_contentReference()));
throw _constructReadException(msg, _currentLocationMinusOne());
}
protected void _reportExtraEndMarker(int actCh) throws StreamReadException {
final String scopeDesc = (actCh == '}') ? "Object" : "Array";
final String msg = String.format(
"Unexpected close marker '%s': no open %s to close", (char) actCh, scopeDesc);
throw _constructReadException(msg, _currentLocationMinusOne());
}
// Method called to report a problem with unquoted control character.
// Note: it is possible to suppress some instances of
// exception by enabling {@link JsonReadFeature#ALLOW_UNESCAPED_CONTROL_CHARS}.
protected void _throwUnquotedSpace(int i, String ctxtDesc) throws StreamReadException {
// It is possible to allow unquoted control chars:
if (!isEnabled(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS) || i > INT_SPACE) {
char c = (char) i;
String msg = "Illegal unquoted character ("+_getCharDesc(c)+"): has to be escaped using backslash to be included in "+ctxtDesc;
throw _constructReadException(msg, _currentLocationMinusOne());
}
}
// @return Description to use as "valid tokens" in an exception message about
// invalid (unrecognized) JSON token: called when parser finds something that
// looks like unquoted textual token
protected String _validJsonTokenList() {
return _validJsonValueList();
}
// @return Description to use as "valid JSON values" in an exception message about
// invalid (unrecognized) JSON value: called when parser finds something that
// does not look like a value or separator.
protected String _validJsonValueList() {
if (isEnabled(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS)) {
return "(JSON String, Number (or 'NaN'/'+INF'/'-INF'), Array, Object or token 'null', 'true' or 'false')";
}
return "(JSON String, Number, Array, Object or token 'null', 'true' or 'false')";
}
/*
/**********************************************************************
/* Internal/package methods: surrogate handling
/**********************************************************************
*/
/**
* Validate that {@code lo} is a valid low surrogate (DC00-DFFF) and combine
* with high surrogate {@code hi} into a supplementary code point.
*
* @since 3.1
*/
protected int _decodeSurrogate(int hi, int lo) throws StreamReadException {
if (lo < 0xDC00 || lo > 0xDFFF) {
_reportError(String.format(
"Broken surrogate pair in property name: expected low surrogate (DC00-DFFF), got %04X", lo));
}
return 0x10000 + ((hi - 0xD800) << 10) + (lo - 0xDC00);
}
/**
* Report an error for a lone low surrogate encountered without a preceding
* high surrogate.
*
* @since 3.1
*/
protected <T> T _reportUnexpectedLowSurrogate(int ch) throws StreamReadException {
return _reportError(String.format(
"Unexpected low surrogate in property name (%04X) without preceding high surrogate", ch));
}
/*
/**********************************************************************
/* Internal/package methods: other
/**********************************************************************
*/
protected boolean _isAllowedCtrlCharRS(int i) {
return (i == INT_RS) && JsonReadFeature.ALLOW_RS_CONTROL_CHAR.enabledIn(_formatReadFeatures);
}
}