ParserKeywordsUtils.java
/*-
* #%L
* JSQLParser library
* %%
* Copyright (C) 2004 - 2021 JSQLParser
* %%
* Dual licensed under GNU LGPL 2.1 or Apache License 2.0
* #L%
*/
package net.sf.jsqlparser.parser;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilities for querying the parser's reserved and non-reserved keyword sets.
*
* <p>
* <b>Non-reserved keywords</b> are derived from the generated {@link CCJSqlParserConstants} token
* table using the {@code MIN_NON_RESERVED_WORD} / {@code MAX_NON_RESERVED_WORD} sentinels.
*
* <p>
* <b>Reserved keywords</b> are determined by scanning the Grammar file for all simple string token
* definitions ({@code <K_NAME: "VALUE">}) and subtracting the non-reserved set.
*/
public class ParserKeywordsUtils {
private static final CharsetEncoder ASCII_ENCODER = StandardCharsets.US_ASCII.newEncoder();
/** Matches a pure keyword image: word characters, at least two characters, pure US-ASCII. */
private static final Pattern KEYWORD_PATTERN = Pattern.compile("[A-Za-z_][A-Za-z_0-9]+");
/**
* Matches simple token definitions in the grammar: {@code <K_NAME: "VALUE">}. Group 1 captures
* the string value. Only matches definitions where the value is a plain quoted string ���
* compound regex tokens like {@code <K_DATETIMELITERAL: ...>} won't match.
*/
private static final Pattern SIMPLE_TOKEN_PATTERN =
Pattern.compile("<K_\\w+:\\s*\"(\\w+)\"\\s*>", Pattern.MULTILINE);
private ParserKeywordsUtils() {
// utility class
}
/**
* Returns the set of <b>non-reserved</b> keywords, i.e. tokens whose kind lies between
* {@code MIN_NON_RESERVED_WORD} and {@code MAX_NON_RESERVED_WORD}. These keywords can be used
* as unquoted identifiers.
*/
public static TreeSet<String> getNonReservedKeywords() {
TreeSet<String> keywords = new TreeSet<>();
String[] images = CCJSqlParserConstants.tokenImage;
for (int kind = CCJSqlParserConstants.MIN_NON_RESERVED_WORD
+ 1; kind < CCJSqlParserConstants.MAX_NON_RESERVED_WORD; kind++) {
String image = extractKeyword(images[kind]);
if (image != null && isKeywordImage(image)) {
keywords.add(image);
}
}
return keywords;
}
/**
* Returns the set of <b>reserved</b> keywords by scanning the Grammar file for all simple
* string token definitions and subtracting the non-reserved keywords.
*
* @param grammarFile the {@code .jjt} grammar file
* @return reserved keyword strings
* @throws IOException if reading the grammar file fails
*/
public static TreeSet<String> getReservedKeywords(File grammarFile) throws IOException {
TreeSet<String> allSimple = getAllSimpleKeywords(grammarFile);
allSimple.removeAll(getNonReservedKeywords());
return allSimple;
}
/**
* Returns all simple string keywords defined in the grammar file. Scans for
* {@code <K_NAME: "VALUE">} patterns and collects the string values.
*
* @param grammarFile the {@code .jjt} grammar file
* @return all simple keyword strings
* @throws IOException if reading the grammar file fails
*/
public static TreeSet<String> getAllSimpleKeywords(File grammarFile) throws IOException {
TreeSet<String> keywords = new TreeSet<>();
String content = Files.readString(grammarFile.toPath(), StandardCharsets.UTF_8);
Matcher matcher = SIMPLE_TOKEN_PATTERN.matcher(content);
while (matcher.find()) {
String value = matcher.group(1);
if (isKeywordImage(value) && ASCII_ENCODER.canEncode(value)) {
keywords.add(value);
}
}
return keywords;
}
/**
* Checks whether the given token kind is a non-reserved keyword that can be used as an unquoted
* identifier.
*/
public static boolean isNonReservedKeyword(int tokenKind) {
return tokenKind > CCJSqlParserConstants.MIN_NON_RESERVED_WORD
&& tokenKind < CCJSqlParserConstants.MAX_NON_RESERVED_WORD;
}
/**
* Writes a reStructuredText documentation file listing all reserved keywords.
*
* @param grammarFile the {@code .jjt} grammar file
* @param rstFile the output {@code .rst} file to write
* @throws IOException if reading or writing fails
*/
public static void writeKeywordsDocumentationFile(File grammarFile, File rstFile)
throws IOException {
TreeSet<String> reserved = getReservedKeywords(grammarFile);
StringBuilder builder = new StringBuilder();
builder.append("***********************\n");
builder.append("Reserved Keywords\n");
builder.append("***********************\n");
builder.append("\n");
builder.append(
"The following Keywords are **reserved** in JSQLParser-|JSQLPARSER_VERSION| and must not be used for **Naming Objects**: \n");
builder.append("\n");
builder.append("+---------------------------+\n");
builder.append("| **Keyword** |\n");
builder.append("+---------------------------+\n");
for (String keyword : reserved) {
builder.append("| ").append(rightPadding(keyword, ' ', 25)).append(" |\n");
builder.append("+---------------------------+\n");
}
try (FileWriter fileWriter = new FileWriter(rstFile)) {
fileWriter.append(builder);
fileWriter.flush();
}
}
public static String rightPadding(String input, char ch, int length) {
return String.format("%" + (-length) + "s", input).replace(' ', ch);
}
/**
* Entry point for the {@code updateKeywords} Gradle/Maven task.
*
* <p>
* Usage: {@code java net.sf.jsqlparser.parser.ParserKeywordsUtils <grammar.jjt> <keywords.rst>}
*
* @param args {@code args[0]}: path to the grammar file, {@code args[1]}: path to the output
* RST file
* @throws Exception if reading or writing fails
*/
public static void main(String[] args) throws Exception {
if (args.length < 2) {
throw new IllegalArgumentException(
"Usage: ParserKeywordsUtils <grammar.jjt> <keywords.rst>");
}
File grammarFile = new File(args[0]);
if (!grammarFile.canRead()) {
throw new IOException("Cannot read grammar file: " + grammarFile);
}
File rstFile = new File(args[1]);
rstFile.getParentFile().mkdirs();
writeKeywordsDocumentationFile(grammarFile, rstFile);
System.out.println("Reserved keywords: " + getReservedKeywords(grammarFile).size());
System.out.println("Non-reserved keywords: " + getNonReservedKeywords().size());
System.out.println("Written to: " + rstFile.getAbsolutePath());
}
/**
* Extracts the keyword string from a {@code tokenImage} entry.
*
* <p>
* JavaCC renders inline BNF token declarations as {@code <K_ACTION>} in {@code tokenImage}.
* Stripping the {@code K_} prefix and angle brackets yields the keyword string.
*
* @return the keyword string, or {@code null} if the entry is not a {@code K_} token
*/
private static String extractKeyword(String tokenImage) {
if (tokenImage == null || tokenImage.length() < 5) {
return null;
}
// Format: <K_ACTION> ��� ACTION
if (tokenImage.charAt(0) == '<'
&& tokenImage.charAt(tokenImage.length() - 1) == '>'
&& tokenImage.startsWith("<K_")) {
return tokenImage.substring(3, tokenImage.length() - 1);
}
// Format: "ACTION" (TOKEN block with quoted string)
if (tokenImage.charAt(0) == '"'
&& tokenImage.charAt(tokenImage.length() - 1) == '"') {
return tokenImage.substring(1, tokenImage.length() - 1);
}
return null;
}
/**
* Returns {@code true} if the image looks like a SQL keyword: alphabetic start, word characters
* only, at least 2 characters, pure US-ASCII.
*/
private static boolean isKeywordImage(String image) {
return KEYWORD_PATTERN.matcher(image).matches()
&& ASCII_ENCODER.canEncode(image);
}
}