Tokenizer.java
/*
* Copyright 2017-2022 original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.micronaut.expressions.parser.token;
import io.micronaut.core.annotation.Internal;
import io.micronaut.core.annotation.Nullable;
import io.micronaut.core.util.CollectionUtils;
import io.micronaut.expressions.parser.exception.ExpressionParsingException;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static io.micronaut.expressions.parser.token.TokenType.*;
/**
* Tokenizer for parsing evaluated expressions.
*
* @author Sergey Gavrilov
* @since 4.0.0
*/
@Internal
public final class Tokenizer {
private static final Map<String, TokenType> TOKENS = CollectionUtils.mapOf(
// WHITESPACES
"^\\s+", WHITESPACE,
// BRACES
"^\\{", L_CURLY,
"^}", R_CURLY,
"^\\[", L_SQUARE,
"^]", R_SQUARE,
"^\\(", L_PAREN,
"^\\)", R_PAREN,
// KEYWORDS
"^instanceof\\b", INSTANCEOF,
"^matches\\b", MATCHES,
"^empty\\b", EMPTY,
"^ctx\\b", BEAN_CONTEXT,
"^env\\b", ENVIRONMENT,
"^this\\b", THIS,
// LITERALS
"^null\\b", NULL, // NULL
"^(true|false)\\b", BOOL, // BOOLEAN
"^'[^']*'", STRING, // STRING
// FLOAT
"^\\d+\\.\\d*((e|E)(\\+|-)?\\d+)?(f|F)", FLOAT,
"^\\.\\d+((e|E)(\\+|-)?\\d+)?(f|F)", FLOAT,
"^\\d+((e|E)(\\+|-)?\\d+)?(f|F)", FLOAT,
// DOUBLE
"^\\d+\\.\\d*((e|E)(\\+|-)?\\d+)?(d|D)?", DOUBLE,
"^\\.\\d+((e|E)(\\+|-)?\\d+)?(d|D)?", DOUBLE,
"^\\d+((e|E)(\\+|-)?\\d+)(d|D)?", DOUBLE,
"^\\d+((e|E)(\\+|-)?\\d+)?(d|D)", DOUBLE,
// LONG
"^0(x|X)[0-9a-fA-F]+(l|L)", LONG,
"^\\d+(l|L)", LONG,
// INT
"^0(x|X)[0-9a-fA-F]+", INT,
"^\\d+", INT,
// SYMBOLS
"^#", EXPRESSION_CONTEXT_REF,
"^\\?\\.", SAFE_NAV,
"^\\?\\:", ELVIS,
"^\\?", QMARK,
"^\\.", DOT,
"^,", COMMA,
"^\\:", COLON,
// RELATIONAL OPERATORS
"^==", EQ,
"^!=", NE,
"^>=", GTE,
"^>", GT,
"^<=", LTE,
"^<", LT,
// LOGICAL OPERATORS
"^!", NOT,
"^not\\b", NOT,
"^&&", AND,
"^and\\b", AND,
"^\\|\\|", OR,
"^or\\b", OR,
// MATH OPERATORS
"^\\+\\+", INCREMENT,
"^\\+", PLUS,
"^\\-\\-", DECREMENT,
"^\\-", MINUS,
"^\\*", MUL,
"^/", DIV,
"^div\\b", DIV,
"^%", MOD,
"^mod\\b", MOD,
"^\\^", POW,
// IDENTIFIERS
"^T\\(", TYPE_IDENTIFIER,
"\\w+", IDENTIFIER);
private static final List<TokenPattern> PATTERNS =
TOKENS.entrySet()
.stream()
.map(entry -> TokenPattern.of(entry.getKey(), entry.getValue()))
.toList();
private final int length;
private final String expression;
private int cursor;
private String remaining;
public Tokenizer(String expression) {
this.expression = expression;
this.remaining = expression;
this.cursor = 0;
this.length = expression.length();
}
@Nullable
public Token getNextToken() {
if (!hasMoreTokens()) {
return null;
}
remaining = expression.substring(cursor);
for (TokenPattern pattern: PATTERNS) {
Token token = pattern.matches(remaining);
if (token == null) {
continue;
}
cursor += token.value().length();
if (token.type() == WHITESPACE) {
return getNextToken();
}
return token;
}
throw new ExpressionParsingException("Unexpected token: " + remaining);
}
private boolean hasMoreTokens() {
return cursor < length;
}
private record TokenPattern(Pattern pattern, TokenType tokenType) {
public static TokenPattern of(String pattern, TokenType tokenType) {
return new TokenPattern(Pattern.compile(pattern), tokenType);
}
@Nullable
public Token matches(String value) {
Matcher matcher = pattern.matcher(value);
if (!matcher.find()) {
return null;
}
return new Token(tokenType, matcher.group());
}
}
}