NonBlockingParserBase.java
package com.fasterxml.jackson.dataformat.smile.async;
import java.io.*;
import java.util.Arrays;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.json.JsonReadContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.core.util.ByteArrayBuilder;
import com.fasterxml.jackson.dataformat.smile.*;
public abstract class NonBlockingParserBase
extends SmileParserBase
{
/*
/**********************************************************************
/* Major state constants
/**********************************************************************
*/
/**
* State right after parser has been constructed, before seeing the first byte
* to know if there's header.
*/
protected final static int MAJOR_INITIAL = 0;
/**
* State right after parser a root value has been
* finished, but next token has not yet been recognized.
*/
protected final static int MAJOR_ROOT = 1;
protected final static int MAJOR_OBJECT_FIELD = 2;
protected final static int MAJOR_OBJECT_VALUE = 3;
protected final static int MAJOR_ARRAY_ELEMENT = 4;
/**
* State after non-blocking input source has indicated that no more input
* is forthcoming AND we have exhausted all the input
*/
protected final static int MAJOR_CLOSED = 5;
// // // "Sub-states"
protected final static int MINOR_HEADER_INITIAL = 1;
protected final static int MINOR_HEADER_INLINE = 2;
protected final static int MINOR_FIELD_NAME_2BYTE = 3;
protected final static int MINOR_FIELD_NAME_LONG = 4;
protected final static int MINOR_FIELD_NAME_SHORT_ASCII = 5;
protected final static int MINOR_FIELD_NAME_SHORT_UNICODE = 6;
protected final static int MINOR_VALUE_NUMBER_INT = 7;
protected final static int MINOR_VALUE_NUMBER_LONG = 8;
protected final static int MINOR_VALUE_NUMBER_FLOAT = 9;
protected final static int MINOR_VALUE_NUMBER_DOUBLE = 10;
protected final static int MINOR_VALUE_NUMBER_BIGINT_LEN = 11;
protected final static int MINOR_VALUE_NUMBER_BIGINT_BODY = 12;
protected final static int MINOR_VALUE_NUMBER_BIGDEC_SCALE = 13;
protected final static int MINOR_VALUE_NUMBER_BIGDEC_LEN = 14;
protected final static int MINOR_VALUE_NUMBER_BIGDEC_BODY = 15;
protected final static int MINOR_VALUE_STRING_SHORT_ASCII = 16;
protected final static int MINOR_VALUE_STRING_SHORT_UNICODE = 17;
protected final static int MINOR_VALUE_STRING_LONG_ASCII = 18;
protected final static int MINOR_VALUE_STRING_LONG_UNICODE = 19;
protected final static int MINOR_VALUE_STRING_SHARED_2BYTE = 20;
protected final static int MINOR_VALUE_BINARY_RAW_LEN = 21;
protected final static int MINOR_VALUE_BINARY_RAW_BODY = 22;
protected final static int MINOR_VALUE_BINARY_7BIT_LEN = 23;
protected final static int MINOR_VALUE_BINARY_7BIT_BODY = 24;
/*
/**********************************************************************
/* Additional parsing state
/**********************************************************************
*/
/**
* Current main decoding state
*/
protected int _majorState;
/**
* Addition indicator within state; contextually relevant for just that state
*/
protected int _minorState;
/**
* Value of {@link #_majorState} after completing a scalar value
*/
protected int _majorStateAfterValue;
/**
* Flag that is sent when calling application indicates that there will
* be no more input to parse.
*/
protected boolean _endOfInput = false;
/*
/**********************************************************************
/* Other buffering
/**********************************************************************
*/
/**
* Temporary buffer for holding content if input not contiguous (but can
* fit in buffer)
*/
protected byte[] _inputCopy;
/**
* Number of bytes buffered in <code>_inputCopy</code>
*/
protected int _inputCopyLen;
/**
* Temporary storage for 32-bit values (int, float), as well as length markers
* for length-prefixed values.
*/
protected int _pending32;
/**
* Temporary storage for 64-bit values (long, double), secondary storage
* for some other things (scale of BigDecimal values)
*/
protected long _pending64;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
/**
* @since 2.16
*/
protected NonBlockingParserBase(IOContext ctxt, int parserFeatures, int smileFeatures,
ByteQuadsCanonicalizer sym)
{
super(ctxt, parserFeatures, smileFeatures, sym);
// We don't need a lot; for most things maximum known a-priori length below 70 bytes
_inputCopy = ctxt.allocReadIOBuffer(500);
_updateTokenToNull();
_majorState = MAJOR_INITIAL;
}
@Override
public ObjectCodec getCodec() {
return null;
}
@Override
public void setCodec(ObjectCodec c) {
throw new UnsupportedOperationException("Can not use ObjectMapper with non-blocking parser");
}
/**
* @since 2.9
*/
@Override
public boolean canParseAsync() { return true; }
/**
* Method called to release internal buffers owned by the base
* reader. This may be called along with {@link #_closeInput} (for
* example, when explicitly closing this reader instance), or
* separately (if need be).
*/
@Override
protected void _releaseBuffers2()
{
byte[] b = _inputCopy;
if (b != null) {
_inputCopy = null;
_ioContext.releaseReadIOBuffer(b);
}
}
/*
/**********************************************************
/* Test support
/**********************************************************
*/
protected ByteQuadsCanonicalizer symbolTableForTests() {
return _symbols;
}
/*
/**********************************************************
/* Abstract methods from JsonParser
/**********************************************************
*/
@Override
public abstract int releaseBuffered(OutputStream out) throws IOException;
@Override
public Object getInputSource() {
// since input is "pushed", to traditional source...
return null;
}
@Override
protected void _closeInput() throws IOException {
// nothing to do here
}
/*
/**********************************************************************
/* Abstract methods from SmileParserBase
/**********************************************************************
*/
// No incomplete values yet -- although may want to make BigInt/BigDec lazy
// in future
@Override
protected void _parseNumericValue() throws IOException {
if (_currToken == JsonToken.VALUE_NUMBER_INT || _currToken == JsonToken.VALUE_NUMBER_FLOAT) {
return;
}
_reportError("Current token (%s) not numeric, can not use numeric value accessors", _currToken);
}
/*
/**********************************************************************
/* Overridden methods
/**********************************************************************
*/
@Override
public boolean hasTextCharacters()
{
if (_currToken == JsonToken.VALUE_STRING) {
// yes; is or can be made available efficiently as char[]
return _textBuffer.hasTextAsCharacters();
}
if (_currToken == JsonToken.FIELD_NAME) {
// not necessarily; possible but:
return _nameCopied;
}
// other types, no benefit from accessing as char[]
return false;
}
/*
/**********************************************************************
/* Public API, access to token information, text
/**********************************************************************
*/
/**
* Method for accessing textual representation of the current event;
* if no current event (before first call to {@link #nextToken}, or
* after encountering end-of-input), returns null.
* Method can be called for any event.
*/
@Override
public String getText() throws IOException
{
if (_currToken == JsonToken.VALUE_STRING) {
return _textBuffer.contentsAsString();
}
JsonToken t = _currToken;
if (t == null || _currToken == JsonToken.NOT_AVAILABLE) { // null only before/after document
return null;
}
if (t == JsonToken.FIELD_NAME) {
return _streamReadContext.getCurrentName();
}
if (t.isNumeric()) {
// TODO: optimize?
return getNumberValue().toString();
}
return _currToken.asString();
}
@Override
public char[] getTextCharacters() throws IOException
{
switch (currentTokenId()) {
case JsonTokenId.ID_STRING:
return _textBuffer.getTextBuffer();
case JsonTokenId.ID_FIELD_NAME:
if (!_nameCopied) {
String name = _streamReadContext.getCurrentName();
int nameLen = name.length();
if (_nameCopyBuffer == null) {
_nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
} else if (_nameCopyBuffer.length < nameLen) {
_nameCopyBuffer = new char[nameLen];
}
name.getChars(0, nameLen, _nameCopyBuffer, 0);
_nameCopied = true;
}
return _nameCopyBuffer;
case JsonTokenId.ID_NUMBER_INT:
case JsonTokenId.ID_NUMBER_FLOAT:
return getNumberValue().toString().toCharArray();
case JsonTokenId.ID_NO_TOKEN:
case JsonTokenId.ID_NOT_AVAILABLE:
return null;
default:
return _currToken.asCharArray();
}
}
@Override
public int getTextLength() throws IOException
{
switch (currentTokenId()) {
case JsonTokenId.ID_STRING:
return _textBuffer.size();
case JsonTokenId.ID_FIELD_NAME:
return _streamReadContext.getCurrentName().length();
case JsonTokenId.ID_NUMBER_INT:
case JsonTokenId.ID_NUMBER_FLOAT:
return getNumberValue().toString().length();
case JsonTokenId.ID_NO_TOKEN:
case JsonTokenId.ID_NOT_AVAILABLE:
return 0; // or throw exception?
default:
final char[] ch = _currToken.asCharArray();
if (ch != null) {
return ch.length;
}
return 0;
}
}
@Override
public int getTextOffset() throws IOException {
return 0;
}
@Override
public int getText(Writer w) throws IOException
{
if (_currToken == JsonToken.VALUE_STRING) {
return _textBuffer.contentsToWriter(w);
}
if (_currToken == JsonToken.NOT_AVAILABLE) {
_reportError("Current token not available: can not call this method");
}
// otherwise default handling works fine
return super.getText(w);
}
/*
/**********************************************************************
/* Public API, access to token information, binary
/**********************************************************************
*/
@Override
public byte[] getBinaryValue(Base64Variant b64variant) throws IOException
{
if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
_reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
}
return _binaryValue;
}
@Override
public Object getEmbeddedObject() throws IOException
{
if (_currToken == JsonToken.VALUE_EMBEDDED_OBJECT ) {
return _binaryValue;
}
return null;
}
@Override
public int readBinaryValue(Base64Variant b64variant, OutputStream out)
throws IOException {
if (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT ) {
_reportError("Current token (%s) not VALUE_EMBEDDED_OBJECT, can not access as binary", _currToken);
}
out.write(_binaryValue);
return _binaryValue.length;
}
/*
/**********************************************************************
/* Handling of nested scope, state
/**********************************************************************
*/
protected final JsonToken _startArrayScope() throws IOException
{
_streamReadContext = _streamReadContext.createChildArrayContext(-1, -1);
_majorState = MAJOR_ARRAY_ELEMENT;
_majorStateAfterValue = MAJOR_ARRAY_ELEMENT;
return _updateToken(JsonToken.START_ARRAY);
}
protected final JsonToken _startObjectScope() throws IOException
{
_streamReadContext = _streamReadContext.createChildObjectContext(-1, -1);
_majorState = MAJOR_OBJECT_FIELD;
_majorStateAfterValue = MAJOR_OBJECT_FIELD;
return _updateToken(JsonToken.START_OBJECT);
}
protected final JsonToken _closeArrayScope() throws IOException
{
if (!_streamReadContext.inArray()) {
_reportMismatchedEndMarker(']', '}');
}
JsonReadContext ctxt = _streamReadContext.getParent();
_streamReadContext = ctxt;
int st;
if (ctxt.inObject()) {
st = MAJOR_OBJECT_FIELD;
} else if (ctxt.inArray()) {
st = MAJOR_ARRAY_ELEMENT;
} else {
st = MAJOR_ROOT;
}
_majorState = st;
_majorStateAfterValue = st;
return _updateToken(JsonToken.END_ARRAY);
}
protected final JsonToken _closeObjectScope() throws IOException
{
if (!_streamReadContext.inObject()) {
_reportMismatchedEndMarker('}', ']');
}
JsonReadContext ctxt = _streamReadContext.getParent();
_streamReadContext = ctxt;
int st;
if (ctxt.inObject()) {
st = MAJOR_OBJECT_FIELD;
} else if (ctxt.inArray()) {
st = MAJOR_ARRAY_ELEMENT;
} else {
st = MAJOR_ROOT;
}
_majorState = st;
_majorStateAfterValue = st;
return _updateToken(JsonToken.END_OBJECT);
}
/*
/**********************************************************************
/* Internal methods, field name parsing
/**********************************************************************
*/
// Helper method for trying to find specified encoded UTF-8 byte sequence
// from symbol table; if successful avoids actual decoding to String
protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
{
// First: maybe we already have this name decoded?
if (len < 5) {
int q = inBuf[inPtr] & 0xFF;
if (--len > 0) {
q = (q << 8) + (inBuf[++inPtr] & 0xFF);
if (--len > 0) {
q = (q << 8) + (inBuf[++inPtr] & 0xFF);
if (--len > 0) {
q = (q << 8) + (inBuf[++inPtr] & 0xFF);
}
}
}
_quad1 = q;
return _symbols.findName(q);
}
if (len < 9) {
// First quadbyte is easy
int q1 = (inBuf[inPtr] & 0xFF) << 8;
q1 += (inBuf[++inPtr] & 0xFF);
q1 <<= 8;
q1 += (inBuf[++inPtr] & 0xFF);
q1 <<= 8;
q1 += (inBuf[++inPtr] & 0xFF);
int q2 = (inBuf[++inPtr] & 0xFF);
len -= 5;
if (len > 0) {
q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
if (--len > 0) {
q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
if (--len > 0) {
q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
}
}
}
_quad1 = q1;
_quad2 = q2;
return _symbols.findName(q1, q2);
}
return _findDecodedLonger(inBuf, inPtr, len);
}
// Method for locating names longer than 8 bytes (in UTF-8)
private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
{
// first, need enough buffer to store bytes as ints:
{
int bufLen = (len + 3) >> 2;
if (bufLen > _quadBuffer.length) {
_quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
}
}
// then decode, full quads first
int offset = 0;
do {
int q = (inBuf[inPtr++] & 0xFF) << 8;
q |= inBuf[inPtr++] & 0xFF;
q <<= 8;
q |= inBuf[inPtr++] & 0xFF;
q <<= 8;
q |= inBuf[inPtr++] & 0xFF;
_quadBuffer[offset++] = q;
} while ((len -= 4) > 3);
// and then leftovers
if (len > 0) {
int q = inBuf[inPtr] & 0xFF;
if (--len > 0) {
q = (q << 8) + (inBuf[++inPtr] & 0xFF);
if (--len > 0) {
q = (q << 8) + (inBuf[++inPtr] & 0xFF);
}
}
_quadBuffer[offset++] = q;
}
return _symbols.findName(_quadBuffer, offset);
}
protected final String _addDecodedToSymbols(int len, String name) throws IOException
{
// 5-May-2023, ckozak: [core#1015] respect CANONICALIZE_FIELD_NAMES factory config.
if (!_symbolsCanonical) {
return name;
}
if (len < 5) {
return _symbols.addName(name, _quad1);
}
if (len < 9) {
return _symbols.addName(name, _quad1, _quad2);
}
int qlen = (len + 3) >> 2;
return _symbols.addName(name, _quadBuffer, qlen);
}
/**
* Method called to try to expand shared name area to fit one more potentially
* shared String. If area is already at its biggest size, will just clear
* the area (by setting next-offset to 0)
*/
protected final String[] _expandSeenNames(String[] oldShared)
{
int len = oldShared.length;
String[] newShared;
if (len == 0) {
newShared = new String[DEFAULT_NAME_BUFFER_LENGTH];
} else if (len == SmileConstants.MAX_SHARED_NAMES) { // too many? Just flush...
newShared = oldShared;
_seenNameCount = 0; // could also clear, but let's not yet bother
} else {
int newSize = (len == DEFAULT_STRING_VALUE_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_NAMES;
newShared = new String[newSize];
System.arraycopy(oldShared, 0, newShared, 0, oldShared.length);
}
return newShared;
}
/*
/**********************************************************************
/* Internal methods, state changes
/**********************************************************************
*/
/**
* Helper method called at point when all input has been exhausted and
* input feeder has indicated no more input will be forthcoming.
*/
protected final JsonToken _eofAsNextToken() throws IOException {
_majorState = MAJOR_CLOSED;
if (!_streamReadContext.inRoot()) {
_handleEOF();
}
close();
return _updateTokenToNull();
}
protected final JsonToken _valueComplete(JsonToken t) throws IOException
{
_majorState = _majorStateAfterValue;
return _updateToken(t);
}
protected final JsonToken _handleSharedString(int index) throws IOException
{
if (index >= _seenStringValueCount) {
_reportInvalidSharedStringValue(index);
}
_textBuffer.resetWithString(_seenStringValues[index]);
return _valueComplete(JsonToken.VALUE_STRING);
}
protected final JsonToken _handleSharedName(int index) throws IOException
{
if (index >= _seenNameCount) {
_reportInvalidSharedName(index);
}
_streamReadContext.setCurrentName(_seenNames[index]);
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
}
protected final void _addSeenStringValue(String v) throws IOException
{
if (_seenStringValueCount < _seenStringValues.length) {
_seenStringValues[_seenStringValueCount++] = v;
return;
}
_expandSeenStringValues(v);
}
private final void _expandSeenStringValues(String v)
{
String[] oldShared = _seenStringValues;
int len = oldShared.length;
String[] newShared;
if (len == 0) {
newShared = new String[DEFAULT_STRING_VALUE_BUFFER_LENGTH];
} else if (len == SmileConstants.MAX_SHARED_STRING_VALUES) { // too many? Just flush...
newShared = oldShared;
_seenStringValueCount = 0; // could also clear, but let's not yet bother
} else {
int newSize = (len == DEFAULT_NAME_BUFFER_LENGTH) ? 256 : SmileConstants.MAX_SHARED_STRING_VALUES;
newShared = new String[newSize];
System.arraycopy(oldShared, 0, newShared, 0, oldShared.length);
}
_seenStringValues = newShared;
_seenStringValues[_seenStringValueCount++] = v;
}
/*
/**********************************************************
/* Internal/package methods: shared/reusable builders
/**********************************************************
*/
/**
* ByteArrayBuilder is needed if 'getBinaryValue' is called. If so,
* we better reuse it for remainder of content.
*/
protected ByteArrayBuilder _byteArrayBuilder = null;
public void _initByteArrayBuilder()
{
if (_byteArrayBuilder == null) {
_byteArrayBuilder = new ByteArrayBuilder();
} else {
_byteArrayBuilder.reset();
}
}
/*
/**********************************************************************
/* Internal methods, error reporting
/**********************************************************************
*/
protected void _reportMissingHeader(int unmaskedFirstByte) throws IOException
{
String msg;
int b = unmaskedFirstByte & 0xFF;
// let's speculate on problem a bit, too
if (b == '{' || b == '[') {
msg = "Input does not start with Smile format header (first byte = 0x"
+Integer.toHexString(b & 0xFF)+") -- rather, it starts with '"+((char) b)
+"' (plain JSON input?) -- can not parse";
} else {
msg = "Input does not start with Smile format header (first byte = 0x"
+Integer.toHexString(b & 0xFF)+") and parser has REQUIRE_HEADER enabled: can not parse";
}
throw new JsonParseException(this, msg);
}
protected void _reportInvalidSharedName(int index) throws IOException
{
if (_seenNames == null) {
_reportError("Encountered shared name reference, even though document header explicitly declared no shared name references are included");
}
_reportError("Invalid shared name reference %d; only got %d names in buffer (invalid content)",
index, _seenNameCount);
}
protected void _reportInvalidSharedStringValue(int index) throws IOException
{
if (_seenStringValues == null) {
_reportError("Encountered shared text value reference, even though document header did not declare shared text value references may be included");
}
_reportError("Invalid shared text value reference %d; only got %s names in buffer (invalid content)",
index, _seenStringValueCount);
}
protected void _reportInvalidInitial(int mask) throws JsonParseException {
_reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask));
}
protected void _reportInvalidOther(int mask, int ptr) throws JsonParseException {
_inputPtr = ptr;
_reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask));
}
}