ParserKeywordsUtils.java

/*-
 * #%L
 * JSQLParser library
 * %%
 * Copyright (C) 2004 - 2021 JSQLParser
 * %%
 * Dual licensed under GNU LGPL 2.1 or Apache License 2.0
 * #L%
 */
package net.sf.jsqlparser.parser;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ParserKeywordsUtils {
    public final static CharsetEncoder CHARSET_ENCODER = StandardCharsets.US_ASCII.newEncoder();

    public final static int RESTRICTED_FUNCTION = 1;
    public final static int RESTRICTED_SCHEMA = 2;
    public final static int RESTRICTED_TABLE = 4;
    public final static int RESTRICTED_COLUMN = 8;
    public final static int RESTRICTED_EXPRESSION = 16;
    public final static int RESTRICTED_ALIAS = 32;
    public final static int RESTRICTED_SQL2016 = 64;

    public final static int RESTRICTED_JSQLPARSER = 128
            | RESTRICTED_FUNCTION
            | RESTRICTED_SCHEMA
            | RESTRICTED_TABLE
            | RESTRICTED_COLUMN
            | RESTRICTED_EXPRESSION
            | RESTRICTED_ALIAS
            | RESTRICTED_SQL2016;


    // Classification follows http://www.h2database.com/html/advanced.html#keywords
    public final static Object[][] ALL_RESERVED_KEYWORDS = {
            {"ABSENT", RESTRICTED_JSQLPARSER},
            {"ALL", RESTRICTED_SQL2016},
            {"AND", RESTRICTED_SQL2016},
            {"ANY", RESTRICTED_JSQLPARSER},
            {"AS", RESTRICTED_SQL2016},
            {"BETWEEN", RESTRICTED_SQL2016},
            {"BOTH", RESTRICTED_SQL2016},
            {"CASEWHEN", RESTRICTED_ALIAS},
            {"CHECK", RESTRICTED_SQL2016},
            {"CONNECT", RESTRICTED_ALIAS},
            {"CONNECT_BY_ROOT", RESTRICTED_JSQLPARSER},
            {"CSV", RESTRICTED_JSQLPARSER},
            {"PRIOR", RESTRICTED_JSQLPARSER},
            {"CONSTRAINT", RESTRICTED_SQL2016},
            {"CREATE", RESTRICTED_ALIAS},
            {"CROSS", RESTRICTED_SQL2016},
            {"CURRENT", RESTRICTED_JSQLPARSER},
            {"DEFAULT", RESTRICTED_ALIAS},
            {"DISTINCT", RESTRICTED_SQL2016},
            {"DISTINCTROW", RESTRICTED_SQL2016},
            {"DOUBLE", RESTRICTED_ALIAS},
            {"ELSE", RESTRICTED_JSQLPARSER},
            {"ERRORS", RESTRICTED_JSQLPARSER},
            {"EXCEPT", RESTRICTED_SQL2016},
            {"EXCLUDES", RESTRICTED_JSQLPARSER},
            {"EXISTS", RESTRICTED_SQL2016},
            {"EXTEND", RESTRICTED_JSQLPARSER},
            {"FALSE", RESTRICTED_SQL2016},
            {"FBV", RESTRICTED_JSQLPARSER},
            {"FETCH", RESTRICTED_SQL2016},
            {"FILE", RESTRICTED_JSQLPARSER},
            {"FINAL", RESTRICTED_JSQLPARSER},
            {"FOR", RESTRICTED_SQL2016},
            {"FORCE", RESTRICTED_SQL2016},
            {"FOREIGN", RESTRICTED_SQL2016},
            {"FROM", RESTRICTED_SQL2016},
            {"FULL", RESTRICTED_SQL2016},
            {"GLOBAL", RESTRICTED_ALIAS},
            {"GROUP", RESTRICTED_SQL2016},
            {"GROUPING", RESTRICTED_ALIAS},
            {"QUALIFY", RESTRICTED_ALIAS},
            {"HAVING", RESTRICTED_SQL2016},
            {"IF", RESTRICTED_SQL2016},
            {"IIF", RESTRICTED_ALIAS},
            {"IGNORE", RESTRICTED_ALIAS},
            {"ILIKE", RESTRICTED_SQL2016},
            {"IMPORT", RESTRICTED_JSQLPARSER},
            {"IN", RESTRICTED_SQL2016},
            {"INCLUDES", RESTRICTED_JSQLPARSER},
            {"INNER", RESTRICTED_SQL2016},
            {"INTERSECT", RESTRICTED_SQL2016},
            {"INTERVAL", RESTRICTED_SQL2016},
            {"INTO", RESTRICTED_JSQLPARSER},
            {"IS", RESTRICTED_SQL2016},
            {"JOIN", RESTRICTED_JSQLPARSER},
            {"LATERAL", RESTRICTED_SQL2016},
            {"LEFT", RESTRICTED_SQL2016},
            {"LIKE", RESTRICTED_SQL2016},
            {"LIMIT", RESTRICTED_SQL2016},
            {"MINUS", RESTRICTED_SQL2016},
            {"NATURAL", RESTRICTED_SQL2016},
            {"NOCYCLE", RESTRICTED_JSQLPARSER},
            {"NOT", RESTRICTED_SQL2016},
            {"NULL", RESTRICTED_SQL2016},
            {"OFFSET", RESTRICTED_SQL2016},
            {"ON", RESTRICTED_SQL2016},
            {"ONLY", RESTRICTED_JSQLPARSER},
            {"OPTIMIZE", RESTRICTED_ALIAS},
            {"OR", RESTRICTED_SQL2016},
            {"ORDER", RESTRICTED_SQL2016},
            {"OUTER", RESTRICTED_JSQLPARSER},
            {"OUTPUT", RESTRICTED_JSQLPARSER},
            {"OPTIMIZE ", RESTRICTED_JSQLPARSER},
            {"OVERWRITE ", RESTRICTED_JSQLPARSER},
            {"PIVOT", RESTRICTED_JSQLPARSER},
            {"PREFERRING", RESTRICTED_JSQLPARSER},
            {"PRIOR", RESTRICTED_ALIAS},
            {"PROCEDURE", RESTRICTED_ALIAS},
            {"PUBLIC", RESTRICTED_ALIAS},
            {"RETURNING", RESTRICTED_JSQLPARSER},
            {"RIGHT", RESTRICTED_SQL2016},
            {"SAMPLE", RESTRICTED_ALIAS},
            {"SCRIPT", RESTRICTED_JSQLPARSER},
            {"SEL", RESTRICTED_ALIAS},
            {"SELECT", RESTRICTED_ALIAS},
            {"SEMI", RESTRICTED_JSQLPARSER},
            {"SET", RESTRICTED_JSQLPARSER},
            {"SOME", RESTRICTED_JSQLPARSER},
            {"START", RESTRICTED_JSQLPARSER},
            {"STATEMENT", RESTRICTED_JSQLPARSER},
            {"TABLES", RESTRICTED_ALIAS},
            {"TOP", RESTRICTED_SQL2016},
            {"TRAILING", RESTRICTED_SQL2016},
            {"TRUE", RESTRICTED_SQL2016},
            {"UNBOUNDED", RESTRICTED_JSQLPARSER},
            {"UNION", RESTRICTED_SQL2016},
            {"UNIQUE", RESTRICTED_SQL2016},
            {"UNKNOWN", RESTRICTED_SQL2016},
            {"UNPIVOT", RESTRICTED_JSQLPARSER},
            {"USE", RESTRICTED_JSQLPARSER},
            {"USING", RESTRICTED_SQL2016},
            {"SQL_CACHE", RESTRICTED_JSQLPARSER},
            {"SQL_CALC_FOUND_ROWS", RESTRICTED_JSQLPARSER},
            {"SQL_NO_CACHE", RESTRICTED_JSQLPARSER},
            {"STRAIGHT_JOIN", RESTRICTED_JSQLPARSER},
            {"TABLESAMPLE", RESTRICTED_ALIAS},
            {"VALUE", RESTRICTED_JSQLPARSER},
            {"VALUES", RESTRICTED_SQL2016},
            {"VARYING", RESTRICTED_JSQLPARSER},
            {"VERIFY", RESTRICTED_JSQLPARSER},
            {"WHEN", RESTRICTED_SQL2016},
            {"WHERE", RESTRICTED_SQL2016},
            {"WINDOW", RESTRICTED_SQL2016},
            {"WITH", RESTRICTED_SQL2016},
            {"XOR", RESTRICTED_JSQLPARSER},
            {"XMLSERIALIZE", RESTRICTED_JSQLPARSER},

            // add keywords from the composite token definitions:
            // tk=<K_DATE_LITERAL> | tk=<K_DATETIMELITERAL> | tk=<K_STRING_FUNCTION_NAME>
            // we will use the composite tokens instead, which are always hit first before the
            // simple keywords
            // @todo: figure out a way to remove these composite tokens, as they do more harm than
            // good
            {"SEL", RESTRICTED_JSQLPARSER},
            {"SELECT", RESTRICTED_JSQLPARSER},
            {"DATE", RESTRICTED_JSQLPARSER},
            {"TIME", RESTRICTED_JSQLPARSER},
            {"TIMESTAMP", RESTRICTED_JSQLPARSER},
            {"YEAR", RESTRICTED_JSQLPARSER},
            {"MONTH", RESTRICTED_JSQLPARSER},
            {"DAY", RESTRICTED_JSQLPARSER},
            {"HOUR", RESTRICTED_JSQLPARSER},
            {"MINUTE", RESTRICTED_JSQLPARSER},
            {"SECOND", RESTRICTED_JSQLPARSER},
            {"SUBSTR", RESTRICTED_JSQLPARSER},
            {"SUBSTRING", RESTRICTED_JSQLPARSER},
            {"TRIM", RESTRICTED_JSQLPARSER},
            {"POSITION", RESTRICTED_JSQLPARSER},
            {"OVERLAY", RESTRICTED_JSQLPARSER},
            {"NEXTVAL", RESTRICTED_COLUMN},

            // @todo: Object Names should not start with Hex-Prefix, we shall not find that Token
            {"0x", RESTRICTED_JSQLPARSER}
    };

    @SuppressWarnings({"PMD.ExcessiveMethodLength"})
    public static List<String> getReservedKeywords(int restriction) {
        ArrayList<String> keywords = new ArrayList<>();
        for (Object[] data : ALL_RESERVED_KEYWORDS) {
            int value = (int) data[1];

            // test if bit is not set
            if ((value & restriction) == restriction || (restriction & value) == value) {
                keywords.add((String) data[0]);
            }
        }

        return keywords;
    }

    /**
     * @param args with: Grammar File, Keyword Documentation File
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        if (args.length < 2) {
            throw new IllegalArgumentException("No filename provided aS context ARGS[0]");
        }

        File grammarFile = new File(args[0]);
        if (grammarFile.exists() && grammarFile.canRead() && grammarFile.canWrite()) {
            buildGrammarForRelObjectName(grammarFile);
            buildGrammarForRelObjectNameWithoutValue(grammarFile);
        } else {
            throw new FileNotFoundException("Can't read file " + args[0]);
        }

        File keywordDocumentationFile = new File(args[1]);
        keywordDocumentationFile.createNewFile();
        if (keywordDocumentationFile.canWrite()) {
            writeKeywordsDocumentationFile(keywordDocumentationFile);
        } else {
            throw new FileNotFoundException("Can't read file " + args[1]);
        }
    }

    public static TreeSet<String> getAllKeywordsUsingRegex(File file) throws IOException {
        Pattern tokenBlockPattern = Pattern.compile(
                "TOKEN\\s*:\\s*/\\*.*\\*/*(?:\\r?\\n|\\r)\\{(?:[^}{]+|\\{(?:[^}{]+|\\{[^}{]*})*})*}",
                Pattern.MULTILINE);
        Pattern tokenStringValuePattern = Pattern.compile("\"(\\w{2,})\"", Pattern.MULTILINE);

        TreeSet<String> allKeywords = new TreeSet<>();

        Path path = file.toPath();
        Charset charset = Charset.defaultCharset();
        String content = new String(Files.readAllBytes(path), charset);

        Matcher tokenBlockmatcher = tokenBlockPattern.matcher(content);
        while (tokenBlockmatcher.find()) {
            String tokenBlock = tokenBlockmatcher.group(0);
            // remove single and multiline comments
            tokenBlock = tokenBlock.replaceAll("(?sm)((\\/\\*.*?\\*\\/)|(\\/\\/.*?$))", "");
            for (String tokenDefinition : getTokenDefinitions(tokenBlock)) {
                // check if token definition is private
                if (tokenDefinition.matches("(?sm)^<\\s*[^#].*")) {
                    Matcher tokenStringValueMatcher =
                            tokenStringValuePattern.matcher(tokenDefinition);
                    while (tokenStringValueMatcher.find()) {
                        String tokenValue = tokenStringValueMatcher.group(1);
                        // test if pure US-ASCII
                        if (CHARSET_ENCODER.canEncode(tokenValue) && tokenValue.matches("\\w+")) {
                            allKeywords.add(tokenValue);
                        }
                    }
                }
            }
        }
        return allKeywords;
    }

    @SuppressWarnings({"PMD.EmptyWhileStmt"})
    private static List<String> getTokenDefinitions(String tokenBlock) {
        List<String> tokenDefinitions = new ArrayList<>();
        int level = 0;
        char openChar = '<';
        char closeChar = '>';
        char[] tokenBlockChars = tokenBlock.toCharArray();
        int tokenDefinitionStart = -1;
        for (int i = 0; i < tokenBlockChars.length; ++i) {
            if (isQuotationMark(i, tokenBlockChars)) {
                // skip everything inside quotation marks
                while (!isQuotationMark(++i, tokenBlockChars)) {
                    // skip until quotation ends
                }
            }

            char character = tokenBlockChars[i];
            if (character == openChar) {
                if (level == 0) {
                    tokenDefinitionStart = i;
                }

                ++level;
            } else if (character == closeChar) {
                --level;

                if (level == 0 && tokenDefinitionStart >= 0) {
                    tokenDefinitions.add(tokenBlock.substring(tokenDefinitionStart, i + 1));
                    tokenDefinitionStart = -1;
                }
            }
        }

        return tokenDefinitions;
    }

    private static boolean isQuotationMark(int index, char[] str) {
        if (str[index] == '\"') {
            // check if quotation is escaped
            if (index > 0 && str[index - 1] == '\\') {
                return index > 1 && str[index - 2] == '\\';
            }

            return true;
        }

        return false;
    }

    public static void buildGrammarForRelObjectNameWithoutValue(File file) throws Exception {
        Pattern methodBlockPattern = Pattern.compile(
                "String\\W*RelObjectNameWithoutValue\\W*\\(\\W*\\)\\W*:\\s*\\{(?:[^}{]+|\\{(?:[^}{]+|\\{[^}{]*})*})*}\\s*\\{(?:[^}{]+|\\{(?:[^}{]+|\\{[^}{]*})*})*}",
                Pattern.MULTILINE);

        TreeSet<String> allKeywords = getAllKeywords(file);

        for (String reserved : getReservedKeywords(RESTRICTED_JSQLPARSER)) {
            allKeywords.remove(reserved);
        }

        StringBuilder builder = new StringBuilder();
        builder.append("String RelObjectNameWithoutValue() :\n"
                + "{    Token tk = null; }\n"
                + "{\n"
                // @todo: find a way to avoid those hardcoded compound tokens
                + "    ( tk=<DATA_TYPE> | tk=<S_IDENTIFIER> | tk=<S_QUOTED_IDENTIFIER> |  tk=<K_DATE_LITERAL> | tk=<K_DATETIMELITERAL> | tk=<K_STRING_FUNCTION_NAME> | tk=<K_ISOLATION> | tk=<K_TIME_KEY_EXPR> | tk=<K_TEXT_LITERAL> \n"
                + "      ");

        for (String keyword : allKeywords) {
            builder.append(" | tk=\"").append(keyword).append("\"");
        }

        builder.append(" )\n" + "    { return tk.image; }\n" + "}");

        replaceInFile(file, methodBlockPattern, builder.toString());
    }

    public static void buildGrammarForRelObjectName(File file) throws Exception {
        // Pattern pattern =
        // Pattern.compile("String\\W*RelObjectName\\W*\\(\\W*\\)\\W*:\\s*\\{(?:[^}{]+|\\{(?:[^}{]+|\\{[^}{]*})*})*}\\s*\\{(?:[^}{]+|\\{(?:[^}{]+|\\{[^}{]*})*})*}",
        // Pattern.MULTILINE);
        TreeSet<String> allKeywords = new TreeSet<>();
        for (String reserved : getReservedKeywords(RESTRICTED_ALIAS)) {
            allKeywords.add(reserved);
        }

        for (String reserved : getReservedKeywords(RESTRICTED_JSQLPARSER & ~RESTRICTED_ALIAS)) {
            allKeywords.remove(reserved);
        }

        StringBuilder builder = new StringBuilder();
        builder.append("String RelObjectName() :\n"
                + "{    Token tk = null; String result = null; }\n"
                + "{\n"
                + "    (result = RelObjectNameWithoutValue()\n"
                + "      ");

        for (String keyword : allKeywords) {
            builder.append(" | tk=\"").append(keyword).append("\"");
        }

        builder.append(" )\n" + "    { return tk!=null ? tk.image : result; }\n" + "}");

        // @todo: Needs fine-tuning, we are not replacing this part yet
        // replaceInFile(file, pattern, builder.toString());
    }

    public static TreeSet<String> getAllKeywords(File file) throws Exception {
        return getAllKeywordsUsingRegex(file);
    }

    private static void replaceInFile(File file, Pattern pattern, String replacement)
            throws IOException {
        Path path = file.toPath();
        Charset charset = Charset.defaultCharset();

        String content = new String(Files.readAllBytes(path), charset);
        content = pattern.matcher(content).replaceAll(replacement);
        Files.write(file.toPath(), content.getBytes(charset));
    }

    public static String rightPadding(String input, char ch, int length) {
        return String.format("%" + (-length) + "s", input).replace(' ', ch);
    }

    public static void writeKeywordsDocumentationFile(File file) throws IOException {
        StringBuilder builder = new StringBuilder();
        builder.append("***********************\n");
        builder.append("Restricted Keywords\n");
        builder.append("***********************\n");
        builder.append("\n");

        builder.append(
                "The following Keywords are **restricted** in JSQLParser-|JSQLPARSER_VERSION| and must not be used for **Naming Objects**: \n");
        builder.append("\n");

        builder.append("+----------------------+-------------+-----------+\n");
        builder.append("| **Keyword**          | JSQL Parser | SQL:2016  |\n");
        builder.append("+----------------------+-------------+-----------+\n");

        for (Object[] keywordDefinition : ALL_RESERVED_KEYWORDS) {
            builder.append("| ").append(rightPadding(keywordDefinition[0].toString(), ' ', 20))
                    .append(" | ");

            int value = (int) keywordDefinition[1];
            int restriction = RESTRICTED_JSQLPARSER;
            String s = (value & restriction) == restriction || (restriction & value) == value
                    ? "Yes"
                    : "";
            builder.append(rightPadding(s, ' ', 11)).append(" | ");

            restriction = RESTRICTED_SQL2016;
            s = (value & restriction) == restriction || (restriction & value) == value
                    ? "Yes"
                    : "";
            builder.append(rightPadding(s, ' ', 9)).append(" | ");

            builder.append("\n");
            builder.append("+----------------------+-------------+-----------+\n");
        }
        try (FileWriter fileWriter = new FileWriter(file)) {
            fileWriter.append(builder);
            fileWriter.flush();
        }
    }
}