NonBlockingByteArrayParser.java
package com.fasterxml.jackson.dataformat.smile.async;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.async.ByteArrayFeeder;
import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
import com.fasterxml.jackson.core.util.VersionUtil;
import com.fasterxml.jackson.dataformat.smile.SmileConstants;
import com.fasterxml.jackson.dataformat.smile.SmileParser;
import com.fasterxml.jackson.dataformat.smile.SmileUtil;
import static com.fasterxml.jackson.dataformat.smile.SmileConstants.BYTE_MARKER_END_OF_STRING;
public class NonBlockingByteArrayParser
extends NonBlockingParserBase
implements ByteArrayFeeder
{
/*
/**********************************************************************
/* Input source config
/**********************************************************************
*/
/**
* This buffer is actually provided via {@link NonBlockingInputFeeder}
*/
protected byte[] _inputBuffer = NO_BYTES;
/**
* In addition to current buffer pointer, and end pointer,
* we will also need to know number of bytes originally
* contained. This is needed to correctly update location
* information when the block has been completed.
*/
protected int _origBufferLen;
// And from ParserBase:
// protected int _inputPtr;
// protected int _inputEnd;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public NonBlockingByteArrayParser(IOContext ctxt, int parserFeatures, int smileFeatures,
ByteQuadsCanonicalizer sym)
{
super(ctxt, parserFeatures, smileFeatures, sym);
}
/*
/**********************************************************************
/* AsyncInputFeeder impl
/**********************************************************************
*/
@Override
public ByteArrayFeeder getNonBlockingInputFeeder() {
return this;
}
@Override
public final boolean needMoreInput() {
return (_inputPtr >=_inputEnd) && !_endOfInput;
}
@Override
public void feedInput(byte[] buf, int start, int end) throws IOException
{
// Must not have remaining input
if (_inputPtr < _inputEnd) {
_reportError("Still have %d undecoded bytes, should not call 'feedInput'", _inputEnd - _inputPtr);
}
if (end < start) {
_reportError("Input end (%d) may not be before start (%d)", end, start);
}
// and shouldn't have been marked as end-of-input
if (_endOfInput) {
_reportError("Already closed, can not feed more input");
}
// Time to update pointers first
_currInputProcessed += _origBufferLen;
_streamReadConstraints.validateDocumentLength(_currInputProcessed);
// And then update buffer settings
_inputBuffer = buf;
_inputPtr = start;
_inputEnd = end;
_origBufferLen = end - start;
}
@Override
public void endOfInput() {
_endOfInput = true;
}
/*
/**********************************************************************
/* Abstract methods/overrides from JsonParser
/**********************************************************************
*/
/* Implementing these methods efficiently for non-blocking cases would
* be complicated; so for now let's just use the default non-optimized
* implementation
*/
// public boolean nextFieldName(SerializableString str) throws IOException
// public String nextTextValue() throws IOException
// public int nextIntValue(int defaultValue) throws IOException
// public long nextLongValue(long defaultValue) throws IOException
// public Boolean nextBooleanValue() throws IOException
@Override
public int releaseBuffered(OutputStream out) throws IOException {
int avail = _inputEnd - _inputPtr;
if (avail > 0) {
out.write(_inputBuffer, _inputPtr, avail);
}
return avail;
}
/*
/**********************************************************************
/* Main-level decoding
/**********************************************************************
*/
@Override
public JsonToken nextToken() throws IOException
{
// First: regardless of where we really are, need at least one more byte;
// can simplify some of the checks by short-circuiting right away
if (_inputPtr >= _inputEnd) {
if (_closed) {
return null;
}
// note: if so, do not even bother changing state
if (_endOfInput) { // except for this special case
return _eofAsNextToken();
}
return JsonToken.NOT_AVAILABLE;
}
// in the middle of tokenization?
if (_currToken == JsonToken.NOT_AVAILABLE) {
return _finishToken();
}
// No: fresh new token; may or may not have existing one
_numTypesValid = NR_UNKNOWN;
// _tokenInputTotal = _currInputProcessed + _inputPtr;
// also: clear any data retained so far
_binaryValue = null;
int ch = _inputBuffer[_inputPtr++];
switch (_majorState) {
case MAJOR_INITIAL:
if (SmileConstants.HEADER_BYTE_1 == ch) { // yes, initial header; should be good
// minor state as 0, which is fine
_majorState = MAJOR_ROOT;
_minorState = MINOR_HEADER_INITIAL;
return _finishHeader(0);
}
if (SmileParser.Feature.REQUIRE_HEADER.enabledIn(_formatFeatures)) {
_reportMissingHeader(ch);
}
// otherwise fine, just drop through to next state
// (NOTE: it double-checks header; fine, won't match; just need the rest)
_majorState = MAJOR_ROOT;
return _startValue(ch);
case MAJOR_ROOT: //
if (SmileConstants.HEADER_BYTE_1 == ch) { // looks like a header
_minorState = MINOR_HEADER_INLINE;
return _finishHeader(0);
}
return _startValue(ch);
case MAJOR_OBJECT_FIELD: // field or end-object
// expect name
return _startFieldName(ch);
case MAJOR_OBJECT_VALUE:
case MAJOR_ARRAY_ELEMENT: // element or end-array
return _startValue(ch);
default:
}
VersionUtil.throwInternal();
return null;
}
/**
* Method called when a (scalar) value type has been detected, but not all of
* contents have been decoded due to incomplete input available.
*/
protected final JsonToken _finishToken() throws IOException
{
// NOTE: caller ensures availability of at least one byte
switch (_minorState) {
case MINOR_HEADER_INITIAL:
case MINOR_HEADER_INLINE:
return _finishHeader(_pending32);
case MINOR_FIELD_NAME_2BYTE:
return _handleSharedName(_pending32 + (_inputBuffer[_inputPtr++] & 0xFF));
case MINOR_FIELD_NAME_LONG:
return _finishLongFieldName(_inputCopyLen);
case MINOR_FIELD_NAME_SHORT_ASCII:
case MINOR_FIELD_NAME_SHORT_UNICODE:
{
final int fullLen = _pending32;
final int needed = fullLen - _inputCopyLen;
final int avail = _inputEnd - _inputPtr;
if (avail >= needed) { // got it all
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, _inputCopyLen, needed);
_inputPtr += needed;
String name = _findDecodedFromSymbols(_inputCopy, 0, fullLen);
if (name == null) {
name = (_minorState == MINOR_FIELD_NAME_SHORT_ASCII)
? _decodeASCIIText(_inputCopy, 0, fullLen)
: _decodeShortUnicodeText(_inputCopy, 0, fullLen)
;
name = _addDecodedToSymbols(fullLen, name);
}
// either way, may need to keep a copy for possible back-ref
if (_seenNames != null) {
if (_seenNameCount >= _seenNames.length) {
_seenNames = _expandSeenNames(_seenNames);
}
_seenNames[_seenNameCount++] = name;
}
_streamReadContext.setCurrentName(name);
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
}
// Otherwise append to buffer, not done
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, _inputCopyLen, avail);
_inputPtr += avail;
_inputCopyLen += avail;
}
return JsonToken.NOT_AVAILABLE;
case MINOR_VALUE_NUMBER_INT:
return _finishInt(_pending32, _inputCopyLen);
case MINOR_VALUE_NUMBER_LONG:
return _finishLong(_pending64, _inputCopyLen);
case MINOR_VALUE_NUMBER_BIGINT_LEN:
return _finishBigIntLen(_pending32, _inputCopyLen);
case MINOR_VALUE_NUMBER_BIGINT_BODY:
return _finishBigIntBody();
case MINOR_VALUE_NUMBER_FLOAT:
return _finishFloat(_pending32, _inputCopyLen);
case MINOR_VALUE_NUMBER_DOUBLE:
return _finishDouble(_pending64, _inputCopyLen);
case MINOR_VALUE_NUMBER_BIGDEC_SCALE:
return _finishBigDecimalScale((int) _pending64, _inputCopyLen);
case MINOR_VALUE_NUMBER_BIGDEC_LEN:
return _finishBigDecimalLen(_pending32, _inputCopyLen);
case MINOR_VALUE_NUMBER_BIGDEC_BODY:
return _finishBigDecimalBody();
case MINOR_VALUE_STRING_SHORT_ASCII:
case MINOR_VALUE_STRING_SHORT_UNICODE:
{
final int fullLen = _pending32;
final int needed = fullLen - _inputCopyLen;
final int avail = _inputEnd - _inputPtr;
if (avail >= needed) { // got it all
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, _inputCopyLen, needed);
_inputPtr += needed;
String text = (_minorState == MINOR_FIELD_NAME_SHORT_ASCII)
? _decodeASCIIText(_inputCopy, 0, fullLen)
: _decodeShortUnicodeText(_inputCopy, 0, fullLen);
if (_seenStringValueCount >= 0) { // shared text values enabled
_addSeenStringValue(text);
}
return _valueComplete(JsonToken.VALUE_STRING);
}
// Otherwise append to buffer, not done
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, _inputCopyLen, avail);
_inputPtr += avail;
_inputCopyLen += avail;
}
return JsonToken.NOT_AVAILABLE;
case MINOR_VALUE_STRING_LONG_ASCII:
return _finishLongASCII();
case MINOR_VALUE_STRING_LONG_UNICODE:
return _finishLongUnicode();
case MINOR_VALUE_STRING_SHARED_2BYTE:
return _handleSharedString(_pending32 + (_inputBuffer[_inputPtr++] & 0xFF));
case MINOR_VALUE_BINARY_RAW_LEN:
return _finishRawBinaryLen(_pending32, _inputCopyLen);
case MINOR_VALUE_BINARY_RAW_BODY:
return _finishRawBinaryBody();
case MINOR_VALUE_BINARY_7BIT_LEN:
return _finish7BitBinaryLen(_pending32, _inputCopyLen);
case MINOR_VALUE_BINARY_7BIT_BODY:
return _finish7BitBinaryBody();
default:
}
throw new IllegalStateException("Illegal state when trying to complete token: majorState="+_majorState);
}
/*
/**********************************************************************
/* Second-level decoding
/**********************************************************************
*/
/**
* Helper method that will decode information from a header block that has been
* detected.
*/
protected JsonToken _finishHeader(int state) throws IOException
{
int ch = 0;
String errorDesc = null;
switch (state) {
case 0:
if (_inputPtr >= _inputEnd) {
_pending32 = state;
return _updateTokenToNA();
}
ch = _inputBuffer[_inputPtr++];
if (ch!= SmileConstants.HEADER_BYTE_2) {
errorDesc = "Malformed content: signature not valid, starts with 0x3a but followed by 0x%s, not 0x29";
break;
}
state = 1;
// fall through
case 1:
if (_inputPtr >= _inputEnd) {
_pending32 = state;
return _updateTokenToNA();
}
ch = _inputBuffer[_inputPtr++];
if (ch != SmileConstants.HEADER_BYTE_3) {
errorDesc = "Malformed content: signature not valid, starts with 0x3a, 0x29, but followed by 0x%s not 0x0A";
break;
}
state = 2;
case 2:
if (_inputPtr >= _inputEnd) {
_pending32 = state;
return _updateTokenToNA();
}
ch = _inputBuffer[_inputPtr++];
{
int versionBits = (ch >> 4) & 0x0F;
// but failure with version number is fatal, can not ignore
if (versionBits != SmileConstants.HEADER_VERSION_0) {
_reportError("Header version number bits (0x%s) indicate unrecognized version; only 0x0 handled by parser",
Integer.toHexString(versionBits));
}
// can avoid tracking names, if explicitly disabled
if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_NAMES) == 0) {
_seenNames = null;
_seenNameCount = -1;
}
// conversely, shared string values must be explicitly enabled
if ((ch & SmileConstants.HEADER_BIT_HAS_SHARED_STRING_VALUES) != 0) {
_seenStringValues = NO_STRINGS;
_seenStringValueCount = 0;
}
_mayContainRawBinary = ((ch & SmileConstants.HEADER_BIT_HAS_RAW_BINARY) != 0);
}
_majorState = MAJOR_ROOT;
_updateTokenToNull();
// Mild difference here: initial marker not reported separately, but in-line
// ones need to be reported as `null` tokens as they are logical document end
// markers (although should be collated with actual end markers)
if (_minorState == MINOR_HEADER_INLINE) {
return null;
}
// Ok to use recursion in case of initial header, as well:
return nextToken();
default:
}
_reportError(errorDesc, Integer.toHexString(ch));
return null;
}
/**
* Helper method called to detect type of a value token (at any level), and possibly
* decode it if contained in input buffer.
* Note that possible header has been ruled out by caller and is not checked here.
*/
private final JsonToken _startValue(int ch) throws IOException
{
main_switch:
switch ((ch >> 5) & 0x7) {
case 0: // short shared string value reference
if (ch == 0) { // important: this is invalid, don't accept
_reportError("Invalid token byte 0x00");
}
return _handleSharedString(ch-1);
case 1: // simple literals, numbers
_numTypesValid = 0;
switch (ch & 0x1F) {
case 0x00:
_textBuffer.resetWithEmpty();
return _valueComplete(JsonToken.VALUE_STRING);
case 0x01:
return _valueComplete(JsonToken.VALUE_NULL);
case 0x02: // false
return _valueComplete(JsonToken.VALUE_FALSE);
case 0x03: // 0x03 == true
return _valueComplete(JsonToken.VALUE_TRUE);
case 0x04:
return _startInt();
case 0x05:
return _startLong();
case 0x06:
return _startBigInt();
case 0x07: // illegal
break;
case 0x08:
return _startFloat();
case 0x09:
return _startDouble();
case 0x0A:
return _startBigDecimal();
case 0x0B: // illegal
break;
case 0x1A:
// == 0x3A == ':' -> possibly switch; but should be handled elsewhere so...
break main_switch;
}
// and everything else is reserved, for now
break;
case 2: // tiny ASCII
// fall through
case 3: // short ASCII
// fall through
return _startShortASCII(1 + (ch & 0x3F));
case 4: // tiny Unicode
// fall through
case 5: // short Unicode
return _startShortUnicode(2 + (ch & 0x3F));
case 6: // small integers; zigzag encoded
_numberInt = SmileUtil.zigzagDecode(ch & 0x1F);
_numTypesValid = NR_INT;
_numberType = NumberType.INT;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
case 7: // binary/long-text/long-shared/start-end-markers
switch (ch & 0x1F) {
case 0x00: // long variable length ASCII
return _startLongASCII();
case 0x04: // long variable length unicode
return _startLongUnicode();
case 0x08: // binary, 7-bit
return _start7BitBinary();
case 0x0C: // long shared string
case 0x0D:
case 0x0E:
case 0x0F:
{
ch = (ch & 0x3) << 8;
if (_inputPtr < _inputEnd) {
return _handleSharedString(ch + (_inputBuffer[_inputPtr++] & 0xFF));
}
}
// did not get it all; mark the state so we know where to return:
_pending32 = ch;
_minorState = MINOR_VALUE_STRING_SHARED_2BYTE;
return _updateTokenToNA();
case 0x18: // START_ARRAY
return _startArrayScope();
case 0x19: // END_ARRAY
return _closeArrayScope();
case 0x1A: // START_OBJECT
return _startObjectScope();
case 0x1B: // not used in this mode; would be END_OBJECT
_reportError("Invalid type marker byte 0xFB in value mode (would be END_OBJECT in key mode)");
case 0x1D: // binary, raw
// should we validate this is legal? (as per header)
return _startRawBinary();
case 0x1F: // 0xFF, end of content
return _updateTokenToNull();
}
break;
}
// If we get this far, type byte is corrupt
_reportError("Invalid type marker byte 0x%02x for expected value token", ch & 0xFF);
return null;
}
/*
/**********************************************************************
/* Second-level decoding, Name decoding
/**********************************************************************
*/
/**
* Method that handles initial token type recognition for token
* that has to be either FIELD_NAME or END_OBJECT.
*/
protected final JsonToken _startFieldName(int ch) throws IOException
{
switch ((ch >> 6) & 3) {
case 0: // misc, including end marker
switch (ch) {
case 0x20: // empty String as name, legal if unusual
_streamReadContext.setCurrentName("");
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
case 0x30: // long shared
case 0x31:
case 0x32:
case 0x33:
if (_inputPtr < _inputEnd) {
return _handleSharedName(((ch & 0x3) << 8) + (_inputBuffer[_inputPtr++] & 0xFF));
}
{
_minorState = MINOR_FIELD_NAME_2BYTE;
_pending32 = (ch & 0x3) << 8;
return _updateTokenToNA();
}
case 0x34: // long ASCII/Unicode name
return _finishLongFieldName(0);
}
break;
case 1: // short shared, can fully process
return _handleSharedName(ch & 0x3F);
case 2: // short ASCII; possibly doable
{
final int len = 1 + (ch & 0x3f);
final int inputPtr = _inputPtr;
final int left = _inputEnd - inputPtr;
if (len <= left) { // gotcha!
_inputPtr = inputPtr + len;
String name = _findDecodedFromSymbols(_inputBuffer, inputPtr, len);
if (name == null) {
name = _decodeASCIIText(_inputBuffer, inputPtr, len);
name = _addDecodedToSymbols(len, name);
}
// either way, may need to keep a copy for possible back-ref
if (_seenNames != null) {
if (_seenNameCount >= _seenNames.length) {
_seenNames = _expandSeenNames(_seenNames);
}
_seenNames[_seenNameCount++] = name;
}
_streamReadContext.setCurrentName(name);
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
}
// Nope: need to copy
_pending32 = len;
_inputCopyLen = left;
if (left > 0) {
_inputPtr = inputPtr + left;
System.arraycopy(_inputBuffer, inputPtr, _inputCopy, 0, left);
}
}
_minorState = MINOR_FIELD_NAME_SHORT_ASCII;
return _updateTokenToNA();
case 3: // short Unicode; possibly doable
// all valid, except for 0xFF
ch &= 0x3F;
{
if (ch > 0x37) {
if (ch == 0x3B) {
return _closeObjectScope();
}
// error, but let's not worry about that here
break;
}
final int len = ch + 2; // values from 2 to 57...
final int inputPtr = _inputPtr;
final int left = _inputEnd - inputPtr;
if (len <= left) { // gotcha!
_inputPtr = inputPtr + len;
String name = _findDecodedFromSymbols(_inputBuffer, inputPtr, len);
if (name == null) {
name = _decodeShortUnicodeText(_inputBuffer, inputPtr, len);
name = _addDecodedToSymbols(len, name);
}
if (_seenNames != null) {
if (_seenNameCount >= _seenNames.length) {
_seenNames = _expandSeenNames(_seenNames);
}
_seenNames[_seenNameCount++] = name;
}
_streamReadContext.setCurrentName(name);
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
}
// Nope: need to copy
_pending32 = len;
_inputCopyLen = left;
if (left > 0) {
_inputPtr = inputPtr + left;
System.arraycopy(_inputBuffer, inputPtr, _inputCopy, 0, left);
}
_minorState = MINOR_FIELD_NAME_SHORT_UNICODE;
return _updateTokenToNA();
}
}
// Other byte values are illegal
_reportError("Invalid type marker byte 0x%02x for expected field name (or END_OBJECT marker)", ch & 0xFF);
return null;
}
private final JsonToken _finishLongFieldName(int outPtr) throws IOException
{
byte[] srcBuffer = _inputBuffer;
byte[] copyBuffer = _inputCopy;
int srcPtr = _inputPtr;
copy_loop:
while (true) {
int max = Math.min(_inputEnd - srcPtr, copyBuffer.length - outPtr);
final int inputEnd = srcPtr + max;
while (srcPtr < inputEnd) {
byte b = srcBuffer[srcPtr++];
if (b == BYTE_MARKER_END_OF_STRING) {
break copy_loop;
}
copyBuffer[outPtr++] = b;
}
// If end of input, bail out
if (srcPtr == _inputEnd) {
_inputPtr = srcPtr;
_minorState = MINOR_FIELD_NAME_LONG;
_inputCopyLen = outPtr;
return _updateTokenToNA();
}
// otherwise increase copy buffer length
int oldLen = copyBuffer.length;
int incr = Math.min(64000, oldLen >> 1);
_inputCopy = copyBuffer = Arrays.copyOf(_inputCopy, oldLen + incr);
// and loop again
}
// But if we get here, we got it all, only need to create quads etc
_inputPtr = srcPtr;
int[] quads = _quadBuffer;
int qlen = (outPtr + 3) >> 2; // last quad may be partial
if (quads.length < qlen) {
_quadBuffer = quads = Arrays.copyOf(quads, qlen + 16);
}
int in = 0;
int quadCount = 0;
for (final int inEnd = (outPtr & ~3); in < inEnd; in += 4) {
int q = (copyBuffer[in] << 24)
| ((copyBuffer[in+1] & 0xFF) << 16)
| ((copyBuffer[in+2] & 0xFF) << 8)
| (copyBuffer[in+3] & 0xFF);
quads[quadCount++] = q;
}
// and possibly more... ?
if (in < outPtr) { // at least 1
int q = copyBuffer[in++] & 0xFF;
if (in < outPtr) { // at least 2
q = (q << 8) | (copyBuffer[in++] & 0xFF);
if (in < outPtr) { // 3 (can't be more)
q = (q << 8) | (copyBuffer[in++] & 0xFF);
}
}
quads[quadCount++] = q;
}
String name = _symbols.findName(quads, quadCount);
if (name == null) {
name = _decodeLongUnicodeName(copyBuffer, 0, outPtr);
}
if (_seenNames != null) {
if (_seenNameCount >= _seenNames.length) {
_seenNames = _expandSeenNames(_seenNames);
}
_seenNames[_seenNameCount++] = name;
}
_streamReadContext.setCurrentName(name);
_majorState = MAJOR_OBJECT_VALUE;
return _updateToken(JsonToken.FIELD_NAME);
}
/*
/**********************************************************************
/* Internal methods: second-level parsing: Strings, short (length-prefix)
/**********************************************************************
*/
private final JsonToken _startShortASCII(final int len) throws IOException
{
final int inputPtr = _inputPtr;
final int left = _inputEnd - inputPtr;
if (len <= left) { // gotcha!
_inputPtr = inputPtr + len;
String text = _decodeASCIIText(_inputBuffer, inputPtr, len);
if (_seenStringValueCount >= 0) { // shared text values enabled
_addSeenStringValue(text);
}
return _valueComplete(JsonToken.VALUE_STRING);
}
// Nope: need to copy
_pending32 = len;
_inputCopyLen = left;
if (left > 0) {
_inputPtr = inputPtr + left;
System.arraycopy(_inputBuffer, inputPtr, _inputCopy, 0, left);
}
_minorState = MINOR_VALUE_STRING_SHORT_ASCII;
return _updateTokenToNA();
}
private final JsonToken _startShortUnicode(final int len) throws IOException
{
final int inPtr = _inputPtr;
final int left = _inputEnd - inPtr;
if (len <= left) { // gotcha!
_inputPtr = inPtr + len;
String text = _decodeShortUnicodeText(_inputBuffer, inPtr, len);
if (_seenStringValueCount >= 0) { // shared text values enabled
_addSeenStringValue(text);
}
return _valueComplete(JsonToken.VALUE_STRING);
}
// Nope: need to copy
_pending32 = len;
_inputCopyLen = left;
if (left > 0) {
System.arraycopy(_inputBuffer, inPtr, _inputCopy, 0, left);
_inputPtr = inPtr + left;
}
_minorState = MINOR_VALUE_STRING_SHORT_UNICODE;
return _updateTokenToNA();
}
/*
/**********************************************************************
/* Internal methods: second-level parsing: Strings, long (end marker)
/**********************************************************************
*/
private final JsonToken _startLongASCII() throws IOException
{
int outPtr = 0;
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
while (_inputPtr < _inputEnd) {
int inPtr = _inputPtr;
int left = _inputEnd - inPtr;
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
left = Math.min(left, outBuf.length - outPtr);
do {
byte b = _inputBuffer[inPtr++];
if (b == SmileConstants.BYTE_MARKER_END_OF_STRING) {
_inputPtr = inPtr;
_textBuffer.setCurrentLength(outPtr);
return _valueComplete(JsonToken.VALUE_STRING);
}
outBuf[outPtr++] = (char) b;
} while (--left > 0);
_inputPtr = inPtr;
}
// denote current length; no partial input to save
_textBuffer.setCurrentLength(outPtr);
_minorState = MINOR_VALUE_STRING_LONG_ASCII;
return _updateTokenToNA();
}
private final JsonToken _finishLongASCII() throws IOException
{
char[] outBuf = _textBuffer.getBufferWithoutReset();
int outPtr = _textBuffer.getCurrentSegmentSize();
while (_inputPtr < _inputEnd) {
int inPtr = _inputPtr;
int left = _inputEnd - inPtr;
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
left = Math.min(left, outBuf.length - outPtr);
do {
byte b = _inputBuffer[inPtr++];
if (b == SmileConstants.BYTE_MARKER_END_OF_STRING) {
_inputPtr = inPtr;
_textBuffer.setCurrentLength(outPtr);
return _valueComplete(JsonToken.VALUE_STRING);
}
outBuf[outPtr++] = (char) b;
} while (--left > 0);
_inputPtr = inPtr;
}
// denote current length; no partial input to save
_textBuffer.setCurrentLength(outPtr);
return JsonToken.NOT_AVAILABLE;
}
protected final JsonToken _startLongUnicode() throws IOException
{
int outPtr = 0;
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
final int[] codes = SmileConstants.sUtf8UnitLengths;
int c;
final byte[] inputBuffer = _inputBuffer;
// NOTE: caller guarantees there is at least one byte available at this point!
main_loop:
while (true) {
// First the tight ASCII loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outBuf.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++] & 0xFF;
if (codes[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
outBuf[outPtr++] = (char) c;
}
_inputPtr = ptr;
if (ptr >= _inputEnd) {
_inputCopyLen = 0;
break main_loop;
}
}
// Ok: end marker, escape or multi-byte?
if (c == SmileConstants.INT_MARKER_END_OF_STRING) {
_textBuffer.setCurrentLength(outPtr);
return _valueComplete(JsonToken.VALUE_STRING);
}
// otherwise need at least one more byte, so:
if (_inputPtr >= _inputEnd) {
_pending32 = c;
_inputCopyLen = 1;
break main_loop;
}
int d = _inputBuffer[_inputPtr++];
switch (codes[c]) {
case 1: // 2-byte UTF
c = _decodeUTF8_2(c, d);
break;
case 2: // 3-byte UTF
if (_inputPtr >= _inputEnd) {
_pending32 = c;
_inputCopy[0] = (byte) d;
_inputCopyLen = 2;
break main_loop;
}
c = _decodeUTF8_3(c, d, _inputBuffer[_inputPtr++]);
break;
case 3: // 4-byte UTF
if ((_inputPtr + 1) >= _inputEnd) {
_pending32 = c;
_inputCopy[0] = (byte) d;
if (_inputPtr >= _inputEnd) {
_inputCopyLen = 2;
} else {
_inputCopy[1] = _inputBuffer[_inputPtr++];
_inputCopyLen = 3;
}
break main_loop;
}
c = _decodeUTF8_4(c, d, _inputBuffer[_inputPtr++], _inputBuffer[_inputPtr++]);
// Let's add first part right away:
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
c = 0xDC00 | (c & 0x3FF);
// And let the other char output down below
break;
default:
// Is this good enough error message?
_reportInvalidInitial(c);
}
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = (char) c;
if (_inputPtr >= _inputEnd) {
_inputCopyLen = 0; // no partially decoded UTF-8 codepoint
break;
}
}
_textBuffer.setCurrentLength(outPtr);
_minorState = MINOR_VALUE_STRING_LONG_UNICODE;
return _updateTokenToNA();
}
private final JsonToken _finishLongUnicode() throws IOException
{
// First things first: did we have partially decoded multi-byte UTF-8 character?
if (_inputCopyLen > 0) {
if (!_finishPartialUnicodeChar()) {
return JsonToken.NOT_AVAILABLE;
}
}
final int[] codes = SmileConstants.sUtf8UnitLengths;
int c;
final byte[] inputBuffer = _inputBuffer;
char[] outBuf = _textBuffer.getBufferWithoutReset();
int outPtr = _textBuffer.getCurrentSegmentSize();
main_loop:
while (true) {
// First the tight ASCII loop:
ascii_loop:
while (true) {
int ptr = _inputPtr;
// Since we have no guarantee for any content, check it first
if (ptr >= _inputEnd) {
_inputCopyLen = 0; // no partially decoded UTF-8 codepoint
break main_loop;
}
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
int max = _inputEnd;
{
int max2 = ptr + (outBuf.length - outPtr);
if (max2 < max) {
max = max2;
}
}
while (ptr < max) {
c = inputBuffer[ptr++] & 0xFF;
if (codes[c] != 0) {
_inputPtr = ptr;
break ascii_loop;
}
outBuf[outPtr++] = (char) c;
}
_inputPtr = ptr;
}
// Ok: end marker, escape or multi-byte?
if (c == SmileConstants.INT_MARKER_END_OF_STRING) {
_textBuffer.setCurrentLength(outPtr);
return _valueComplete(JsonToken.VALUE_STRING);
}
// otherwise need at least one more byte, so:
if (_inputPtr >= _inputEnd) {
_pending32 = c;
_inputCopyLen = 1;
break main_loop;
}
int d = _inputBuffer[_inputPtr++];
switch (codes[c]) {
case 1: // 2-byte UTF
c = _decodeUTF8_2(c, d);
break;
case 2: // 3-byte UTF
if (_inputPtr >= _inputEnd) {
_pending32 = c;
_inputCopy[0] = (byte) d;
_inputCopyLen = 2;
break main_loop;
}
c = _decodeUTF8_3(c, d, _inputBuffer[_inputPtr++]);
break;
case 3: // 4-byte UTF
if ((_inputPtr + 1) >= _inputEnd) {
_pending32 = c;
_inputCopy[0] = (byte) d;
if (_inputPtr >= _inputEnd) {
_inputCopyLen = 2;
} else {
_inputCopy[1] = _inputBuffer[_inputPtr++];
_inputCopyLen = 3;
}
break main_loop;
}
c = _decodeUTF8_4(c, d, _inputBuffer[_inputPtr++], _inputBuffer[_inputPtr++]);
// Let's add first part right away:
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
c = 0xDC00 | (c & 0x3FF);
// And let the other char output down below
break;
default:
// Is this good enough error message?
_reportInvalidInitial(c);
}
// Need more room?
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
// Ok, let's add char to output:
outBuf[outPtr++] = (char) c;
}
_textBuffer.setCurrentLength(outPtr);
return JsonToken.NOT_AVAILABLE;
}
private final boolean _finishPartialUnicodeChar() throws IOException
{
final int[] codes = SmileConstants.sUtf8UnitLengths;
int c;
// NOTE: first byte stored in `_pending32` and we know we got one more byte for sure
int next = _inputBuffer[_inputPtr++];
switch (codes[_pending32]) { // type of UTF-8 sequence (length - 1)
case 1: // 2-byte UTF
c = _decodeUTF8_2(_pending32, next);
break;
case 2: // 3-byte UTF: did we have one or two bytes?
if (_inputCopyLen == 1) {
if (_inputPtr >= _inputEnd) {
_inputCopy[0] = (byte) next;
_inputCopyLen = 2;
return false;
}
c = _decodeUTF8_3(_pending32, next, _inputBuffer[_inputPtr++]);
} else {
c = _decodeUTF8_3(_pending32, _inputCopy[0], next);
}
break;
case 3: // 4-byte UTF; had 1/2/3 bytes, now got 2/3/4
switch (_inputCopyLen) {
case 1:
if (_inputPtr >= _inputEnd) {
_inputCopy[0] = (byte) next;
_inputCopyLen = 2;
return false;
}
int i3 = _inputBuffer[_inputPtr++];
if (_inputPtr >= _inputEnd) {
_inputCopy[0] = (byte) next;
_inputCopy[1] = (byte) i3;
_inputCopyLen = 3;
return false;
}
c = _decodeUTF8_4(_pending32, next, i3, _inputBuffer[_inputPtr++]);
break;
case 2:
if (_inputPtr >= _inputEnd) {
_inputCopy[1] = (byte) next;
_inputCopyLen = 3;
return false;
}
c = _decodeUTF8_4(_pending32, _inputCopy[0], next, _inputBuffer[_inputPtr++]);
break;
case 3:
default:
c = _decodeUTF8_4(_pending32, _inputCopy[0], _inputCopy[1], next);
break;
}
// Let's add first part right away:
_textBuffer.append((char) (0xD800 | (c >> 10)));
c = 0xDC00 | (c & 0x3FF);
// And let the other char output down below
break;
default:
// Is this good enough error message?
_reportInvalidInitial(_pending32);
c = 0;
}
_inputCopyLen = 0; // just for safety
_textBuffer.append((char) c);
return true;
}
/*
/**********************************************************************
/* Internal methods, UTF8 decoding
/**********************************************************************
*/
private final int _decodeUTF8_2(int c, int d) throws IOException
{
if ((d & 0xC0) != 0x080) {
_reportInvalidOther(d & 0xFF, _inputPtr);
}
return ((c & 0x1F) << 6) | (d & 0x3F);
}
private final int _decodeUTF8_3(int c, int d, int e) throws IOException
{
c &= 0x0F;
if ((d & 0xC0) != 0x080) {
_reportInvalidOther(d & 0xFF, _inputPtr);
}
c = (c << 6) | (d & 0x3F);
if ((e & 0xC0) != 0x080) {
_reportInvalidOther(e & 0xFF, _inputPtr);
}
return (c << 6) | (e & 0x3F);
}
// @return Character value <b>minus 0x10000</c>; this so that caller
// can readily expand it to actual surrogates
private final int _decodeUTF8_4(int c, int d, int e, int f) throws IOException
{
if ((d & 0xC0) != 0x080) {
_reportInvalidOther(d & 0xFF, _inputPtr);
}
c = ((c & 0x07) << 6) | (d & 0x3F);
if ((e & 0xC0) != 0x080) {
_reportInvalidOther(e & 0xFF, _inputPtr);
}
c = (c << 6) | (e & 0x3F);
if ((f & 0xC0) != 0x080) {
_reportInvalidOther(f & 0xFF, _inputPtr);
}
return ((c << 6) | (f & 0x3F)) - 0x10000;
}
/*
/**********************************************************************
/* Internal methods: second-level parsing: numbers, integral
/**********************************************************************
*/
private final JsonToken _startInt() throws IOException
{
// common case first: have all we need
if ((_inputPtr + 5) > _inputEnd) {
return _finishInt(0, 0);
}
int value = _decodeVInt();
_numberInt = SmileUtil.zigzagDecode(value);
_numTypesValid = NR_INT;
_numberType = NumberType.INT;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
}
private final JsonToken _finishInt(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
value = (value << 6) | (b & 0x3F);
_numberInt = SmileUtil.zigzagDecode(value);
_numTypesValid = NR_INT;
_numberType = NumberType.INT;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_NUMBER_INT;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _startLong() throws IOException
{
// common case first: have all we need
int ptr = _inputPtr;
final int maxEnd = ptr+11;
if (maxEnd >= _inputEnd) {
return _finishLong(0L, 0);
}
int i = _inputBuffer[ptr++]; // first 7 bits
i = (i << 7) + _inputBuffer[ptr++]; // 14 bits
i = (i << 7) + _inputBuffer[ptr++]; // 21
i = (i << 7) + _inputBuffer[ptr++];
long l = i;
while (true) {
int value = _inputBuffer[ptr++];
if (value < 0) {
l = (l << 6) + (value & 0x3F);
_inputPtr = ptr;
_numberLong = SmileUtil.zigzagDecode(l);
_numTypesValid = NR_LONG;
_numberType = NumberType.LONG;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
}
l = (l << 7) + value;
if (ptr >= maxEnd) {
_reportError("Corrupt input; 64-bit VInt extends beyond 11 data bytes");
}
}
}
private final JsonToken _finishLong(long value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
value = (value << 6) | (b & 0x3F);
_numberLong = SmileUtil.zigzagDecode(value);
_numTypesValid = NR_LONG;
_numberType = NumberType.LONG;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 11) {
_reportError("Corrupt input; 64-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_NUMBER_LONG;
_pending64 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _startBigInt() throws IOException
{
_initByteArrayBuilder();
if ((_inputPtr + 5) > _inputEnd) {
return _finishBigIntLen(0, 0);
}
_pending32 = _decodeVInt();
_inputCopyLen = 0;
return _finishBigIntBody();
}
private final JsonToken _finishBigIntLen(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
_pending32 = (value << 6) | (b & 0x3F);
_inputCopyLen = 0;
return _finishBigIntBody();
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_NUMBER_BIGINT_LEN;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _finishBigIntBody() throws IOException
{
if (_decode7BitEncoded()) { // got it all!
final byte[] array = _byteArrayBuilder.toByteArray();
_streamReadConstraints.validateIntegerLength(array.length);
_numberBigInt = new BigInteger(array);
_numberType = NumberType.BIG_INTEGER;
_numTypesValid = NR_BIGINT;
return _valueComplete(JsonToken.VALUE_NUMBER_INT);
}
_minorState = MINOR_VALUE_NUMBER_BIGINT_BODY;
return _updateTokenToNA();
}
/*
/**********************************************************************
/* Internal methods: second-level parsing: numbers, floating-point
/**********************************************************************
*/
protected final JsonToken _startFloat() throws IOException
{
int ptr = _inputPtr;
if ((ptr + 5) > _inputEnd) {
return _finishFloat(0, 0);
}
// NOTE! all bytes guaranteed to be unsigned (should verify?)
int i = _fourBytesToInt(ptr);
ptr += 4;
i = (i << 7) + _inputBuffer[ptr++];
_inputPtr = ptr;
_numberFloat = (float) Float.intBitsToFloat(i);
_numTypesValid = NR_FLOAT;
_numberType = NumberType.FLOAT;
return _valueComplete(JsonToken.VALUE_NUMBER_FLOAT);
}
protected final JsonToken _finishFloat(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
value = (value << 7) + _inputBuffer[_inputPtr++];
if (++bytesRead == 5) {
_numberFloat = (float) Float.intBitsToFloat(value);
_numTypesValid = NR_FLOAT;
_numberType = NumberType.FLOAT;
return _valueComplete(JsonToken.VALUE_NUMBER_FLOAT);
}
}
_minorState = MINOR_VALUE_NUMBER_FLOAT;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
protected final JsonToken _startDouble() throws IOException
{
int ptr = _inputPtr;
if ((ptr + 10) > _inputEnd) {
return _finishDouble(0L, 0);
}
// NOTE! all bytes guaranteed to be unsigned (should verify?)
long hi = _fourBytesToInt(ptr);
ptr += 4;
long value = (hi << 28) + (long) _fourBytesToInt(ptr);
ptr += 4;
// and then remaining 2 bytes
value = (value << 7) + _inputBuffer[ptr++];
value = (value << 7) + _inputBuffer[ptr++];
_inputPtr = ptr;
_numberDouble = Double.longBitsToDouble(value);
_numTypesValid = NR_DOUBLE;
_numberType = NumberType.DOUBLE;
return _valueComplete(JsonToken.VALUE_NUMBER_FLOAT);
}
protected final JsonToken _finishDouble(long value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
value = (value << 7) + _inputBuffer[_inputPtr++];
if (++bytesRead == 10) {
_numberDouble = Double.longBitsToDouble(value);
_numTypesValid = NR_DOUBLE;
_numberType = NumberType.DOUBLE;
return _valueComplete(JsonToken.VALUE_NUMBER_FLOAT);
}
}
_minorState = MINOR_VALUE_NUMBER_DOUBLE;
_pending64 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _startBigDecimal() throws IOException
{
_initByteArrayBuilder();
if ((_inputPtr + 5) > _inputEnd) {
return _finishBigDecimalScale(0, 0);
}
// note! Scale stored here, need _pending32 for byte length
_pending64 = _decodeVInt();
return _finishBigDecimalLen(0, 0);
}
private final JsonToken _finishBigDecimalScale(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
value = (value << 6) | (b & 0x3F);
_pending64 = value;
return _finishBigDecimalLen(0, 0);
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_NUMBER_BIGDEC_SCALE;
// note! Scale stored here, need _pending32 for byte length
_pending64 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _finishBigDecimalLen(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
_pending32 = (value << 6) | (b & 0x3F);
_inputCopyLen = 0;
return _finishBigDecimalBody();
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_NUMBER_BIGDEC_LEN;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _finishBigDecimalBody() throws IOException
{
if (_decode7BitEncoded()) { // got it all!
// note: scale value is signed, needs zigzag, so:
final int scale = SmileUtil.zigzagDecode((int) _pending64);
final byte[] array = _byteArrayBuilder.toByteArray();
_streamReadConstraints.validateFPLength(array.length);
BigInteger bigInt = new BigInteger(array);
_numberBigDecimal = new BigDecimal(bigInt, scale);
_numberType = NumberType.BIG_DECIMAL;
_numTypesValid = NR_BIGDECIMAL;
return _valueComplete(JsonToken.VALUE_NUMBER_FLOAT);
}
_minorState = MINOR_VALUE_NUMBER_BIGDEC_BODY;
return _updateTokenToNA();
}
/*
/**********************************************************************
/* Internal methods: second-level parsing: Binary
/**********************************************************************
*/
protected final JsonToken _startRawBinary() throws IOException
{
if ((_inputPtr + 5) > _inputEnd) {
return _finishRawBinaryLen(0, 0);
}
final int len = _decodeVInt();
_binaryValue = new byte[len];
_pending32 = len;
_inputCopyLen = 0;
return _finishRawBinaryBody();
}
private final JsonToken _finishRawBinaryLen(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
final int len = (value << 6) | (b & 0x3F);
_binaryValue = new byte[len];
_pending32 = len;
_inputCopyLen = 0;
return _finishRawBinaryBody();
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_BINARY_RAW_LEN;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _finishRawBinaryBody() throws IOException
{
int totalLen = _pending32;
int offset = _inputCopyLen;
int needed = totalLen - offset;
int avail = _inputEnd - _inputPtr;
if (avail >= needed) {
System.arraycopy(_inputBuffer, _inputPtr, _binaryValue, offset, needed);
_inputPtr += needed;
return _valueComplete(JsonToken.VALUE_EMBEDDED_OBJECT);
}
if (avail > 0) {
System.arraycopy(_inputBuffer, _inputPtr, _binaryValue, offset, avail);
_inputPtr += avail;
}
_pending32 = totalLen;
_inputCopyLen = offset+avail;
_minorState = MINOR_VALUE_BINARY_RAW_BODY;
return _updateTokenToNA();
}
private final JsonToken _start7BitBinary() throws IOException
{
_initByteArrayBuilder();
if ((_inputPtr + 5) > _inputEnd) {
return _finish7BitBinaryLen(0, 0);
}
_pending32 = _decodeVInt();
_inputCopyLen = 0;
return _finish7BitBinaryBody();
}
private final JsonToken _finish7BitBinaryLen(int value, int bytesRead) throws IOException
{
while (_inputPtr < _inputEnd) {
int b = _inputBuffer[_inputPtr++];
if (b < 0) { // got it all; these are last 6 bits
_pending32 = (value << 6) | (b & 0x3F);
_inputCopyLen = 0;
return _finish7BitBinaryBody();
}
// can't get too big; 5 bytes is max
if (++bytesRead >= 5 ) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
value = (value << 7) | b;
}
_minorState = MINOR_VALUE_BINARY_7BIT_LEN;
_pending32 = value;
_inputCopyLen = bytesRead;
return _updateTokenToNA();
}
private final JsonToken _finish7BitBinaryBody() throws IOException
{
if (_decode7BitEncoded()) { // got it all!
_binaryValue = _byteArrayBuilder.toByteArray();
return _valueComplete(JsonToken.VALUE_EMBEDDED_OBJECT);
}
_minorState = MINOR_VALUE_BINARY_7BIT_BODY;
return _updateTokenToNA();
}
/*
/**********************************************************************
/* Shared text decoding methods
/**********************************************************************
*/
private final String _decodeASCIIText(byte[] inBuf, int inPtr, int len) throws IOException
{
// note: caller ensures we have enough bytes available
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
int outPtr = 0;
// loop unrolling seems to help here:
for (int inEnd = inPtr + len - 3; inPtr < inEnd; ) {
outBuf[outPtr++] = (char) inBuf[inPtr++];
outBuf[outPtr++] = (char) inBuf[inPtr++];
outBuf[outPtr++] = (char) inBuf[inPtr++];
outBuf[outPtr++] = (char) inBuf[inPtr++];
}
int left = (len & 3);
if (left > 0) {
outBuf[outPtr++] = (char) inBuf[inPtr++];
if (left > 1) {
outBuf[outPtr++] = (char) inBuf[inPtr++];
if (left > 2) {
outBuf[outPtr++] = (char) inBuf[inPtr++];
}
}
}
_textBuffer.setCurrentLength(len);
return _textBuffer.contentsAsString();
}
/**
* Helper method used to decode short Unicode string, length for which actual
* length (in bytes) is known
*
* @param len Length between 1 and 64
*/
private final String _decodeShortUnicodeText(byte[] inBuf, int inPtr, int len) throws IOException
{
// note: caller ensures we have enough bytes available
int outPtr = 0;
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
final int[] codes = SmileConstants.sUtf8UnitLengths;
for (int end = inPtr + len; inPtr < end; ) {
int i = inBuf[inPtr++] & 0xFF;
int code = codes[i];
if (code != 0) {
// trickiest one, need surrogate handling
switch (code) {
case 1:
i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F);
break;
case 2:
i = ((i & 0x0F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
break;
case 3:
i = ((i & 0x07) << 18)
| ((inBuf[inPtr++] & 0x3F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportError("Invalid byte 0x%02x in short Unicode text block (offset %d)", i & 0xFF, inPtr);
}
}
outBuf[outPtr++] = (char) i;
}
_textBuffer.setCurrentLength(outPtr);
return _textBuffer.contentsAsString();
}
private final String _decodeLongUnicodeName(byte[] inBuf, int inPtr, int len) throws IOException
{
// note: caller ensures we have enough bytes available
int outPtr = 0;
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
// 26-Jan-2024, tatu: Must have enough space for all-ASCII, at least:
if (outBuf.length < (len + 8)) {
outBuf = _textBuffer.expandCurrentSegment(len + 8);
}
final int[] codes = SmileConstants.sUtf8UnitLengths;
// since we only check expansion for multi-byte chars, there must be
// enough room for remaining bytes as all-ASCII
int estSlack = outBuf.length - len - 8;
for (int end = inPtr + len; inPtr < end; ) {
int i = inBuf[inPtr++] & 0xFF;
int code = codes[i];
if (code != 0) {
// trickiest one, need surrogate handling
switch (code) {
case 1:
i = ((i & 0x1F) << 6) | (inBuf[inPtr++] & 0x3F);
break;
case 2:
i = ((i & 0x0F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
break;
case 3:
i = ((i & 0x07) << 18)
| ((inBuf[inPtr++] & 0x3F) << 12)
| ((inBuf[inPtr++] & 0x3F) << 6)
| (inBuf[inPtr++] & 0x3F);
// note: this is the codepoint value; need to split, too
i -= 0x10000;
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
i = 0xDC00 | (i & 0x3FF);
break;
default: // invalid
_reportError("Invalid byte 0x%02x in short Unicode text block (offset %d)", i & 0xFF, inPtr);
}
estSlack -= code;
if (estSlack <= 0) {
outBuf = _textBuffer.expandCurrentSegment();
// and re-adjust: most likely we are now safe but...
estSlack = (outBuf.length - outPtr) - (end - inPtr) - 8;
}
}
outBuf[outPtr++] = (char) i;
}
_textBuffer.setCurrentLength(outPtr);
return _textBuffer.contentsAsString();
}
/*
/**********************************************************************
/* Low-level decoding of building blocks (vints, 7-bit encoded blocks)
/**********************************************************************
*/
private final int _fourBytesToInt(int ptr) throws IOException
{
int i = _inputBuffer[ptr++]; // first 7 bits
i = (i << 7) + _inputBuffer[ptr++]; // 14 bits
i = (i << 7) + _inputBuffer[ptr++]; // 21
i = (i << 7) + _inputBuffer[ptr++];
return i;
}
private final int _decodeVInt() throws IOException
{
int ptr = _inputPtr;
int value = _inputBuffer[ptr++];
if (value < 0) { // 6 bits
_inputPtr = ptr;
return value & 0x3F;
}
int i = _inputBuffer[ptr++];
if (i >= 0) { // 13 bits
value = (value << 7) + i;
i = _inputBuffer[ptr++];
if (i >= 0) {
value = (value << 7) + i;
i = _inputBuffer[ptr++];
if (i >= 0) {
value = (value << 7) + i;
// and then we must get negative
i = _inputBuffer[ptr++];
if (i >= 0) {
_reportError("Corrupt input; 32-bit VInt extends beyond 5 data bytes");
}
}
}
}
_inputPtr = ptr;
return (value << 6) + (i & 0x3F);
}
private final boolean _decode7BitEncoded() throws IOException
{
int bytesToDecode = _pending32;
int buffered = _inputCopyLen;
int ptr = _inputPtr;
int avail = _inputEnd - ptr;
// Leftovers from past round?
if (buffered > 0) {
// but offline case of incomplete last block
if (bytesToDecode < 7) {
return _decode7BitEncodedTail(bytesToDecode, buffered);
}
int needed = 8 - buffered;
if (avail < needed) { // not enough to decode, just copy
System.arraycopy(_inputBuffer, ptr, _inputCopy, buffered, avail);
_inputPtr = ptr+avail;
_inputCopyLen = buffered + avail;
_pending32 = bytesToDecode;
return false;
}
_inputCopyLen = 0;
// yes, got full 8 byte chunk
final byte[] copy = _inputCopy;
System.arraycopy(_inputBuffer, ptr, copy, buffered, needed);
int i1 = (copy[0] << 25) + (copy[1] << 18)
+ (copy[2] << 11) + (copy[3] << 4);
int x = copy[4];
i1 += x >> 3;
_byteArrayBuilder.appendFourBytes(i1);
i1 = ((x & 0x7) << 21) + (copy[5] << 14)
+ (copy[6] << 7) + copy[7];
_byteArrayBuilder.appendThreeBytes(i1);
ptr += needed;
bytesToDecode -= 7;
avail = _inputEnd - ptr;
}
final byte[] input = _inputBuffer;
// And then all full 8-to-7-byte chunks
while (bytesToDecode > 6) {
if (avail < 8) { // full blocks missing, quit
if (avail > 0) {
System.arraycopy(_inputBuffer, ptr, _inputCopy, 0, avail);
ptr += avail;
_inputCopyLen = avail;
}
_pending32 = bytesToDecode;
_inputPtr = ptr;
return false;
}
int i1 = (input[ptr++] << 25)
+ (input[ptr++] << 18)
+ (input[ptr++] << 11)
+ (input[ptr++] << 4);
int x = input[ptr++];
i1 += x >> 3;
_byteArrayBuilder.appendFourBytes(i1);
i1 = ((x & 0x7) << 21)
+ (input[ptr++] << 14)
+ (input[ptr++] << 7)
+ input[ptr++];
_byteArrayBuilder.appendThreeBytes(i1);
bytesToDecode -= 7;
avail -= 8;
}
_inputPtr = ptr;
// and finally, tail?
if (bytesToDecode > 0) {
if (avail == 0) {
_pending32 = bytesToDecode;
_inputCopyLen = 0;
return false;
}
return _decode7BitEncodedTail(bytesToDecode, 0);
}
return true;
}
protected final boolean _decode7BitEncodedTail(int bytesToDecode, int buffered) throws IOException
{
if (bytesToDecode == 0) {
return true;
}
int avail = _inputEnd - _inputPtr;
int needed = bytesToDecode + 1 - buffered;
if (avail < needed) {
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, buffered, avail);
_inputPtr += avail;
_inputCopyLen = buffered + avail;
_pending32 = bytesToDecode;
return false;
}
System.arraycopy(_inputBuffer, _inputPtr, _inputCopy, buffered, needed);
_inputPtr += needed;
// Handling of full tail is bit different...
int value = _inputCopy[0];
for (int i = 1; i < bytesToDecode; ++i) {
value = (value << 7) + _inputCopy[i];
_byteArrayBuilder.append(value >> (7 - i));
}
// last byte is different, has remaining 1 - 6 bits, right-aligned
value <<= bytesToDecode;
_byteArrayBuilder.append(value + _inputCopy[bytesToDecode]);
_inputCopyLen = 0;
return true;
}
}