ParserKeywordsUtilsTest.java
/*-
* #%L
* JSQLParser library
* %%
* Copyright (C) 2004 - 2022 JSQLParser
* %%
* Dual licensed under GNU LGPL 2.1 or Apache License 2.0
* #L%
*/
package net.sf.jsqlparser.parser;
import org.javacc.jjtree.JJTree;
import org.javacc.parser.Context;
import org.javacc.parser.JavaCCParser;
import org.javacc.parser.RCharacterList;
import org.javacc.parser.RChoice;
import org.javacc.parser.RJustName;
import org.javacc.parser.ROneOrMore;
import org.javacc.parser.RSequence;
import org.javacc.parser.RStringLiteral;
import org.javacc.parser.RZeroOrMore;
import org.javacc.parser.RZeroOrOne;
import org.javacc.parser.RegularExpression;
import org.javacc.parser.Semanticize;
import org.javacc.parser.Token;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException;
import java.io.InvalidClassException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Logger;
class ParserKeywordsUtilsTest {
public final static CharsetEncoder CHARSET_ENCODER = StandardCharsets.US_ASCII.newEncoder();
final static File FILE = new File("src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt");
final static Logger LOGGER = Logger.getLogger(ParserKeywordsUtilsTest.class.getName());
private static void addTokenImage(TreeSet<String> allKeywords, RStringLiteral literal) {
if (CHARSET_ENCODER.canEncode(literal.image) && literal.image.matches("\\w+")) {
allKeywords.add(literal.image);
}
}
@SuppressWarnings({"PMD.EmptyIfStmt", "PMD.CyclomaticComplexity"})
private static void addTokenImage(TreeSet<String> allKeywords, Object o) throws Exception {
if (o instanceof RStringLiteral) {
RStringLiteral literal = (RStringLiteral) o;
addTokenImage(allKeywords, literal);
} else if (o instanceof RChoice) {
RChoice choice = (RChoice) o;
addTokenImage(allKeywords, choice);
} else if (o instanceof RSequence) {
RSequence sequence1 = (RSequence) o;
addTokenImage(allKeywords, sequence1);
} else if (o instanceof ROneOrMore) {
ROneOrMore oneOrMore = (ROneOrMore) o;
addTokenImage(allKeywords, oneOrMore);
} else if (o instanceof RZeroOrMore) {
RZeroOrMore zeroOrMore = (RZeroOrMore) o;
addTokenImage(allKeywords, zeroOrMore);
} else if (o instanceof RZeroOrOne) {
RZeroOrOne zeroOrOne = (RZeroOrOne) o;
addTokenImage(allKeywords, zeroOrOne);
} else if (o instanceof RJustName) {
RJustName zeroOrOne = (RJustName) o;
addTokenImage(allKeywords, zeroOrOne);
} else if (o instanceof RCharacterList) {
// do nothing, we are not interested in those
} else {
throw new InvalidClassException(
"Unknown Type: " + o.getClass().getName() + " " + o.toString());
}
}
private static void addTokenImage(TreeSet<String> allKeywords, RSequence sequence)
throws Exception {
for (Object o : sequence.units) {
addTokenImage(allKeywords, o);
}
}
private static void addTokenImage(TreeSet<String> allKeywords, ROneOrMore oneOrMore) {
for (Token token : oneOrMore.lhsTokens) {
if (CHARSET_ENCODER.canEncode(token.image)) {
allKeywords.add(token.image);
}
}
}
private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrMore oneOrMore) {
for (Token token : oneOrMore.lhsTokens) {
if (CHARSET_ENCODER.canEncode(token.image)) {
allKeywords.add(token.image);
}
}
}
private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrOne oneOrMore) {
for (Token token : oneOrMore.lhsTokens) {
if (CHARSET_ENCODER.canEncode(token.image)) {
allKeywords.add(token.image);
}
}
}
private static void addTokenImage(TreeSet<String> allKeywords, RJustName oneOrMore) {
for (Token token : oneOrMore.lhsTokens) {
if (CHARSET_ENCODER.canEncode(token.image)) {
allKeywords.add(token.image);
}
}
}
private static void addTokenImage(TreeSet<String> allKeywords, RChoice choice)
throws Exception {
for (Object o : choice.getChoices()) {
addTokenImage(allKeywords, o);
}
}
public static TreeSet<String> getAllKeywordsUsingJavaCC(File file) throws Exception {
TreeSet<String> allKeywords = new TreeSet<>();
Path jjtGrammar = file.toPath();
Path jjGrammarOutputDir = Files.createTempDirectory("jjgrammer");
new JJTree().main(new String[] {
"-JJTREE_OUTPUT_DIRECTORY=" + jjGrammarOutputDir.toString(),
"-CODE_GENERATOR=java",
jjtGrammar.toString()
});
Path jjGrammarFile = jjGrammarOutputDir.resolve("JSqlParserCC.jj");
Context context = new Context();
JavaCCParser parser = new JavaCCParser(new java.io.FileInputStream(jjGrammarFile.toFile()));
parser.javacc_input(context);
// needed for filling JavaCCGlobals
// JavaCCErrors.reInit();
Semanticize.start(context);
// read all the Token and get the String image
for (Map.Entry<Integer, RegularExpression> item : context.globals().rexps_of_tokens
.entrySet()) {
addTokenImage(allKeywords, item.getValue());
}
// clean up
if (jjGrammarOutputDir.toFile().exists()) {
jjGrammarOutputDir.toFile().delete();
}
return allKeywords;
}
@Test
void getAllKeywords() throws IOException {
Set<String> allKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
}
@Test
void getAllKeywordsUsingJavaCC() throws Exception {
Set<String> allKeywords = getAllKeywordsUsingJavaCC(FILE);
Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
}
// Test, if all Tokens found per RegEx are also found from the JavaCCParser
@Test
void compareKeywordLists() throws Exception {
Set<String> allRegexKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
Set<String> allJavaCCParserKeywords = getAllKeywordsUsingJavaCC(FILE);
// Exceptions, which should not have been found from the RegEx
List<String> exceptions = Arrays.asList("0x");
// We expect all Keywords from the Regex to be found by the JavaCC Parser
for (String s : allRegexKeywords) {
Assertions.assertTrue(
exceptions.contains(s) || allJavaCCParserKeywords.contains(s),
"The Keywords from JavaCC do not contain Keyword: " + s);
}
// The JavaCC Parser finds some more valid Keywords (where no explicit Token has been
// defined
for (String s : allJavaCCParserKeywords) {
if (!(exceptions.contains(s) || allRegexKeywords.contains(s))) {
LOGGER.fine("Found Additional Keywords from Parser: " + s);
}
}
}
}