JsonScanner.java
/*
* Copyright 2008-present the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.data.mongodb.util.json;
import org.bson.BsonRegularExpression;
import org.bson.json.JsonParseException;
import org.jspecify.annotations.NullUnmarked;
/**
* Parses the string representation of a JSON object into a set of {@link JsonToken}-derived objects. <br />
* JsonScanner implementation borrowed from <a href=
* "https://github.com/mongodb/mongo-java-driver/blob/master/bson/src/main/org/bson/json/JsonScanner.java">MongoDB
* Inc.</a> licensed under the Apache License, Version 2.0. <br />
* Formatted and modified to allow reading Spring Data specific placeholder values.
*
* @author Jeff Yemin
* @author Trisha Gee
* @author Robert Guo
* @author Ross Lawley
* @author Christoph Strobl
* @since 2.2
*/
@NullUnmarked
class JsonScanner {
private final JsonBuffer buffer;
JsonScanner(final String json) {
this(new JsonBuffer(json));
}
JsonScanner(final JsonBuffer buffer) {
this.buffer = buffer;
}
/**
* @param newPosition the new position of the cursor position in the buffer
*/
public void setBufferPosition(final int newPosition) {
buffer.setPosition(newPosition);
}
/**
* @return the current location of the cursor in the buffer
*/
public int getBufferPosition() {
return buffer.getPosition();
}
/**
* Finds and returns the next complete token from this scanner. If scanner reached the end of the source, it will
* return a token with {@code JSONTokenType.END_OF_FILE} type.
*
* @return The next token.
* @throws JsonParseException if source is invalid.
*/
public JsonToken nextToken() {
int c = buffer.read();
while (c != -1 && Character.isWhitespace(c)) {
c = buffer.read();
}
if (c == -1) {
return new JsonToken(JsonTokenType.END_OF_FILE, "<eof>");
}
switch (c) {
case '{':
return new JsonToken(JsonTokenType.BEGIN_OBJECT, "{");
case '}':
return new JsonToken(JsonTokenType.END_OBJECT, "}");
case '[':
return new JsonToken(JsonTokenType.BEGIN_ARRAY, "[");
case ']':
return new JsonToken(JsonTokenType.END_ARRAY, "]");
case '(':
return new JsonToken(JsonTokenType.LEFT_PAREN, "(");
case ')':
return new JsonToken(JsonTokenType.RIGHT_PAREN, ")");
case ':':
c = buffer.read();
buffer.unread(c);
if (c == '#') { // for binding the SQL style ':#{#firstname}"'
return scanBindString();
}
return new JsonToken(JsonTokenType.COLON, ":");
case ',':
return new JsonToken(JsonTokenType.COMMA, ",");
case '\'':
case '"':
return scanString((char) c);
case '/':
return scanRegularExpression();
default:
if (c == '-' || Character.isDigit(c)) {
return scanNumber((char) c);
} else if (c == '$' || c == '_' || Character.isLetter(c)) {
return scanUnquotedString();
} else if (c == '?') { // for binding parameters. Both simple and SpEL ones.
return scanBindString();
} else {
int position = buffer.getPosition();
buffer.unread(c);
throw new JsonParseException("Invalid JSON input; Position: %d; Character: '%c'.", position, c);
}
}
}
/**
* Reads {@code RegularExpressionToken} from source. The following variants of lexemes are possible:
*
* <pre>
* /pattern/
* /\(pattern\)/
* /pattern/ims
* </pre>
*
* Options can include 'i','m','x','s'
*
* @return The regular expression token.
* @throws JsonParseException if regular expression representation is not valid.
*/
private JsonToken scanRegularExpression() {
int start = buffer.getPosition() - 1;
int options = -1;
RegularExpressionState state = RegularExpressionState.IN_PATTERN;
while (true) {
int c = buffer.read();
switch (state) {
case IN_PATTERN:
switch (c) {
case -1:
state = RegularExpressionState.INVALID;
break;
case '/':
state = RegularExpressionState.IN_OPTIONS;
options = buffer.getPosition();
break;
case '\\':
state = RegularExpressionState.IN_ESCAPE_SEQUENCE;
break;
default:
state = RegularExpressionState.IN_PATTERN;
break;
}
break;
case IN_ESCAPE_SEQUENCE:
state = RegularExpressionState.IN_PATTERN;
break;
case IN_OPTIONS:
switch (c) {
case 'i':
case 'm':
case 'x':
case 's':
state = RegularExpressionState.IN_OPTIONS;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = RegularExpressionState.DONE;
break;
default:
if (Character.isWhitespace(c)) {
state = RegularExpressionState.DONE;
} else {
state = RegularExpressionState.INVALID;
}
break;
}
break;
default:
break;
}
switch (state) {
case DONE:
buffer.unread(c);
int end = buffer.getPosition();
BsonRegularExpression regex = new BsonRegularExpression(buffer.substring(start + 1, options - 1),
buffer.substring(options, end));
return new JsonToken(JsonTokenType.REGULAR_EXPRESSION, regex);
case INVALID:
throw new JsonParseException("Invalid JSON regular expression; Position: %d.", buffer.getPosition());
default:
}
}
}
/**
* Reads {@code StringToken} from source.
*
* @return The string token.
*/
private JsonToken scanBindString() {
int start = buffer.getPosition() - 1;
int c = buffer.read();
int charCount = 0;
boolean isExpression = false;
int parenthesisCount = 0;
while (c == '$' || c == '_' || Character.isLetterOrDigit(c) || c == '#' || c == '{' || c == '['
|| (isExpression && isExpressionAllowedChar(c))) {
if (charCount == 0 && (c == '#' || c == '$')) {
isExpression = true;
} else if (isExpression) {
if (c == '{') {
parenthesisCount++;
} else if (c == '}') {
parenthesisCount--;
if (parenthesisCount == 0) {
c = buffer.read();
break;
}
}
}
charCount++;
c = buffer.read();
}
buffer.unread(c);
String lexeme = buffer.substring(start, buffer.getPosition());
return new JsonToken(JsonTokenType.UNQUOTED_STRING, lexeme);
}
private static boolean isExpressionAllowedChar(int c) {
return (c == '+' || //
c == '-' || //
c == ':' || //
c == '.' || //
c == ',' || //
c == '*' || //
c == '/' || //
c == '%' || //
c == '(' || //
c == ')' || //
c == '[' || //
c == ']' || //
c == '#' || //
c == '{' || //
c == '}' || //
c == '@' || //
c == '^' || //
c == '!' || //
c == '=' || //
c == '&' || //
c == '|' || //
c == '?' || //
c == '$' || //
c == '>' || //
c == '<' || //
c == '"' || //
c == '\'' || //
c == ' ');
}
/**
* Reads {@code StringToken} from source.
*
* @return The string token.
*/
private JsonToken scanUnquotedString() {
int start = buffer.getPosition() - 1;
int c = buffer.read();
while (c == '$' || c == '_' || Character.isLetterOrDigit(c)) {
c = buffer.read();
}
buffer.unread(c);
String lexeme = buffer.substring(start, buffer.getPosition());
return new JsonToken(JsonTokenType.UNQUOTED_STRING, lexeme);
}
/**
* Reads number token from source. The following variants of lexemes are possible:
*
* <pre>
* 12
* 123
* -0
* -345
* -0.0
* 0e1
* 0e-1
* -0e-1
* 1e12
* -Infinity
* </pre>
*
* @return The number token.
* @throws JsonParseException if number representation is invalid.
*/
// CHECKSTYLE:OFF
private JsonToken scanNumber(final char firstChar) {
int c = firstChar;
int start = buffer.getPosition() - 1;
NumberState state;
switch (c) {
case '-':
state = NumberState.SAW_LEADING_MINUS;
break;
case '0':
state = NumberState.SAW_LEADING_ZERO;
break;
default:
state = NumberState.SAW_INTEGER_DIGITS;
break;
}
JsonTokenType type = JsonTokenType.INT64;
while (true) {
c = buffer.read();
switch (state) {
case SAW_LEADING_MINUS:
switch (c) {
case '0':
state = NumberState.SAW_LEADING_ZERO;
break;
case 'I':
state = NumberState.SAW_MINUS_I;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_INTEGER_DIGITS;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_LEADING_ZERO:
switch (c) {
case '.':
state = NumberState.SAW_DECIMAL_POINT;
break;
case 'e':
case 'E':
state = NumberState.SAW_EXPONENT_LETTER;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.DONE;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_INTEGER_DIGITS;
} else if (Character.isWhitespace(c)) {
state = NumberState.DONE;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_INTEGER_DIGITS:
switch (c) {
case '.':
state = NumberState.SAW_DECIMAL_POINT;
break;
case 'e':
case 'E':
state = NumberState.SAW_EXPONENT_LETTER;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.DONE;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_INTEGER_DIGITS;
} else if (Character.isWhitespace(c)) {
state = NumberState.DONE;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_DECIMAL_POINT:
type = JsonTokenType.DOUBLE;
if (Character.isDigit(c)) {
state = NumberState.SAW_FRACTION_DIGITS;
} else {
state = NumberState.INVALID;
}
break;
case SAW_FRACTION_DIGITS:
switch (c) {
case 'e':
case 'E':
state = NumberState.SAW_EXPONENT_LETTER;
break;
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.DONE;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_FRACTION_DIGITS;
} else if (Character.isWhitespace(c)) {
state = NumberState.DONE;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_EXPONENT_LETTER:
type = JsonTokenType.DOUBLE;
switch (c) {
case '+':
case '-':
state = NumberState.SAW_EXPONENT_SIGN;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_EXPONENT_DIGITS;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_EXPONENT_SIGN:
if (Character.isDigit(c)) {
state = NumberState.SAW_EXPONENT_DIGITS;
} else {
state = NumberState.INVALID;
}
break;
case SAW_EXPONENT_DIGITS:
switch (c) {
case ',':
case '}':
case ']':
case ')':
state = NumberState.DONE;
break;
default:
if (Character.isDigit(c)) {
state = NumberState.SAW_EXPONENT_DIGITS;
} else if (Character.isWhitespace(c)) {
state = NumberState.DONE;
} else {
state = NumberState.INVALID;
}
break;
}
break;
case SAW_MINUS_I:
boolean sawMinusInfinity = true;
char[] nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' };
for (int i = 0; i < nfinity.length; i++) {
if (c != nfinity[i]) {
sawMinusInfinity = false;
break;
}
c = buffer.read();
}
if (sawMinusInfinity) {
type = JsonTokenType.DOUBLE;
switch (c) {
case ',':
case '}':
case ']':
case ')':
case -1:
state = NumberState.DONE;
break;
default:
if (Character.isWhitespace(c)) {
state = NumberState.DONE;
} else {
state = NumberState.INVALID;
}
break;
}
} else {
state = NumberState.INVALID;
}
break;
default:
}
switch (state) {
case INVALID:
throw new JsonParseException("Invalid JSON number");
case DONE:
buffer.unread(c);
String lexeme = buffer.substring(start, buffer.getPosition());
if (type == JsonTokenType.DOUBLE) {
return new JsonToken(JsonTokenType.DOUBLE, Double.parseDouble(lexeme));
} else {
long value = Long.parseLong(lexeme);
if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
return new JsonToken(JsonTokenType.INT64, value);
} else {
return new JsonToken(JsonTokenType.INT32, (int) value);
}
}
default:
}
}
}
// CHECKSTYLE:ON
/**
* Reads {@code StringToken} from source.
*
* @return The string token.
*/
// CHECKSTYLE:OFF
private JsonToken scanString(final char quoteCharacter) {
StringBuilder sb = new StringBuilder();
while (true) {
int c = buffer.read();
switch (c) {
case '\\':
c = buffer.read();
switch (c) {
case '\'':
sb.append('\'');
break;
case '"':
sb.append('"');
break;
case '\\':
sb.append('\\');
break;
case '/':
sb.append('/');
break;
case 'b':
sb.append('\b');
break;
case 'f':
sb.append('\f');
break;
case 'n':
sb.append('\n');
break;
case 'r':
sb.append('\r');
break;
case 't':
sb.append('\t');
break;
case 'u':
int u1 = buffer.read();
int u2 = buffer.read();
int u3 = buffer.read();
int u4 = buffer.read();
if (u4 != -1) {
String hex = new String(new char[] { (char) u1, (char) u2, (char) u3, (char) u4 });
sb.append((char) Integer.parseInt(hex, 16));
}
break;
default:
throw new JsonParseException("Invalid escape sequence in JSON string '\\%c'.", c);
}
break;
default:
if (c == quoteCharacter) {
return new JsonToken(JsonTokenType.STRING, sb.toString());
}
if (c != -1) {
sb.append((char) c);
}
}
if (c == -1) {
throw new JsonParseException("End of file in JSON string.");
}
}
}
private enum NumberState {
SAW_LEADING_MINUS, SAW_LEADING_ZERO, SAW_INTEGER_DIGITS, SAW_DECIMAL_POINT, SAW_FRACTION_DIGITS, SAW_EXPONENT_LETTER, SAW_EXPONENT_SIGN, SAW_EXPONENT_DIGITS, SAW_MINUS_I, DONE, INVALID
}
private enum RegularExpressionState {
IN_PATTERN, IN_ESCAPE_SEQUENCE, IN_OPTIONS, DONE, INVALID
}
}