/src/serenity/Userland/Libraries/LibSQL/AST/Parser.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org> |
3 | | * Copyright (c) 2021, Mahmoud Mandour <ma.mandourr@gmail.com> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | #include <AK/ByteString.h> |
11 | | #include <AK/StringView.h> |
12 | | #include <LibSQL/AST/AST.h> |
13 | | #include <LibSQL/AST/Lexer.h> |
14 | | #include <LibSQL/AST/Token.h> |
15 | | |
16 | | namespace SQL::AST { |
17 | | |
18 | | namespace Limits { |
19 | | // https://www.sqlite.org/limits.html |
20 | | constexpr size_t maximum_expression_tree_depth = 1000; |
21 | | constexpr size_t maximum_subquery_depth = 100; |
22 | | constexpr size_t maximum_bound_parameters = 1000; |
23 | | } |
24 | | |
25 | | class Parser { |
26 | | struct Error { |
27 | | ByteString message; |
28 | | SourcePosition position; |
29 | | |
30 | | ByteString to_byte_string() const |
31 | 0 | { |
32 | 0 | return ByteString::formatted("{} (line: {}, column: {})", message, position.line, position.column); |
33 | 0 | } |
34 | | }; |
35 | | |
36 | | public: |
37 | | explicit Parser(Lexer lexer); |
38 | | |
39 | | NonnullRefPtr<Statement> next_statement(); |
40 | | |
41 | 59.7M | bool has_errors() const { return m_parser_state.m_errors.size(); } |
42 | 0 | Vector<Error> const& errors() const { return m_parser_state.m_errors; } |
43 | | |
44 | | protected: |
45 | | NonnullRefPtr<Expression> parse_expression(); // Protected for unit testing. |
46 | | |
47 | | private: |
48 | | struct ParserState { |
49 | | explicit ParserState(Lexer); |
50 | | |
51 | | Lexer m_lexer; |
52 | | Token m_token; |
53 | | Vector<Error> m_errors; |
54 | | size_t m_current_expression_depth { 0 }; |
55 | | size_t m_current_subquery_depth { 0 }; |
56 | | size_t m_bound_parameters { 0 }; |
57 | | }; |
58 | | |
59 | | NonnullRefPtr<Statement> parse_statement(); |
60 | | NonnullRefPtr<Statement> parse_statement_with_expression_list(RefPtr<CommonTableExpressionList>); |
61 | | NonnullRefPtr<CreateSchema> parse_create_schema_statement(); |
62 | | NonnullRefPtr<CreateTable> parse_create_table_statement(); |
63 | | NonnullRefPtr<AlterTable> parse_alter_table_statement(); |
64 | | NonnullRefPtr<DropTable> parse_drop_table_statement(); |
65 | | NonnullRefPtr<DescribeTable> parse_describe_table_statement(); |
66 | | NonnullRefPtr<Insert> parse_insert_statement(RefPtr<CommonTableExpressionList>); |
67 | | NonnullRefPtr<Update> parse_update_statement(RefPtr<CommonTableExpressionList>); |
68 | | NonnullRefPtr<Delete> parse_delete_statement(RefPtr<CommonTableExpressionList>); |
69 | | NonnullRefPtr<Select> parse_select_statement(RefPtr<CommonTableExpressionList>); |
70 | | RefPtr<CommonTableExpressionList> parse_common_table_expression_list(); |
71 | | |
72 | | NonnullRefPtr<Expression> parse_primary_expression(); |
73 | | NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression> primary); |
74 | | bool match_secondary_expression() const; |
75 | | RefPtr<Expression> parse_literal_value_expression(); |
76 | | RefPtr<Expression> parse_bind_parameter_expression(); |
77 | | RefPtr<Expression> parse_column_name_expression(Optional<ByteString> with_parsed_identifier = {}, bool with_parsed_period = false); |
78 | | RefPtr<Expression> parse_unary_operator_expression(); |
79 | | RefPtr<Expression> parse_binary_operator_expression(NonnullRefPtr<Expression> lhs); |
80 | | RefPtr<Expression> parse_chained_expression(bool surrounded_by_parentheses = true); |
81 | | RefPtr<Expression> parse_cast_expression(); |
82 | | RefPtr<Expression> parse_case_expression(); |
83 | | RefPtr<Expression> parse_exists_expression(bool invert_expression); |
84 | | RefPtr<Expression> parse_collate_expression(NonnullRefPtr<Expression> expression); |
85 | | RefPtr<Expression> parse_is_expression(NonnullRefPtr<Expression> expression); |
86 | | RefPtr<Expression> parse_match_expression(NonnullRefPtr<Expression> lhs, bool invert_expression); |
87 | | RefPtr<Expression> parse_null_expression(NonnullRefPtr<Expression> expression, bool invert_expression); |
88 | | RefPtr<Expression> parse_between_expression(NonnullRefPtr<Expression> expression, bool invert_expression); |
89 | | RefPtr<Expression> parse_in_expression(NonnullRefPtr<Expression> expression, bool invert_expression); |
90 | | |
91 | | NonnullRefPtr<ColumnDefinition> parse_column_definition(); |
92 | | NonnullRefPtr<TypeName> parse_type_name(); |
93 | | NonnullRefPtr<SignedNumber> parse_signed_number(); |
94 | | NonnullRefPtr<CommonTableExpression> parse_common_table_expression(); |
95 | | NonnullRefPtr<QualifiedTableName> parse_qualified_table_name(); |
96 | | NonnullRefPtr<ReturningClause> parse_returning_clause(); |
97 | | NonnullRefPtr<ResultColumn> parse_result_column(); |
98 | | NonnullRefPtr<TableOrSubquery> parse_table_or_subquery(); |
99 | | NonnullRefPtr<OrderingTerm> parse_ordering_term(); |
100 | | void parse_schema_and_table_name(ByteString& schema_name, ByteString& table_name); |
101 | | ConflictResolution parse_conflict_resolution(); |
102 | | |
103 | | template<typename ParseCallback> |
104 | | void parse_comma_separated_list(bool surrounded_by_parentheses, ParseCallback&& parse_callback) |
105 | 681k | { |
106 | 681k | if (surrounded_by_parentheses) |
107 | 10.6k | consume(TokenType::ParenOpen); |
108 | | |
109 | 59.7M | while (!has_errors() && !match(TokenType::Eof)) { |
110 | 59.4M | parse_callback(); |
111 | | |
112 | 59.4M | if (!match(TokenType::Comma)) |
113 | 390k | break; |
114 | | |
115 | 59.0M | consume(TokenType::Comma); |
116 | 59.0M | }; |
117 | | |
118 | 681k | if (surrounded_by_parentheses) |
119 | 10.6k | consume(TokenType::ParenClose); |
120 | 681k | } Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_create_table_statement()::$_0>(bool, SQL::AST::Parser::parse_create_table_statement()::$_0&&) Line | Count | Source | 105 | 221 | { | 106 | 221 | if (surrounded_by_parentheses) | 107 | 221 | consume(TokenType::ParenOpen); | 108 | | | 109 | 5.43M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 5.43M | parse_callback(); | 111 | | | 112 | 5.43M | if (!match(TokenType::Comma)) | 113 | 119 | break; | 114 | | | 115 | 5.43M | consume(TokenType::Comma); | 116 | 5.43M | }; | 117 | | | 118 | 221 | if (surrounded_by_parentheses) | 119 | 221 | consume(TokenType::ParenClose); | 120 | 221 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_insert_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0>(bool, SQL::AST::Parser::parse_insert_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0&&) Line | Count | Source | 105 | 217 | { | 106 | 217 | if (surrounded_by_parentheses) | 107 | 217 | consume(TokenType::ParenOpen); | 108 | | | 109 | 356k | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 356k | parse_callback(); | 111 | | | 112 | 356k | if (!match(TokenType::Comma)) | 113 | 191 | break; | 114 | | | 115 | 356k | consume(TokenType::Comma); | 116 | 356k | }; | 117 | | | 118 | 217 | if (surrounded_by_parentheses) | 119 | 217 | consume(TokenType::ParenClose); | 120 | 217 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_insert_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1>(bool, SQL::AST::Parser::parse_insert_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1&&) Line | Count | Source | 105 | 237 | { | 106 | 237 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 205k | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 204k | parse_callback(); | 111 | | | 112 | 204k | if (!match(TokenType::Comma)) | 113 | 192 | break; | 114 | | | 115 | 204k | consume(TokenType::Comma); | 116 | 204k | }; | 117 | | | 118 | 237 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 237 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0>(bool, SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0&&) Line | Count | Source | 105 | 660 | { | 106 | 660 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 1.86M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 1.86M | parse_callback(); | 111 | | | 112 | 1.86M | if (!match(TokenType::Comma)) | 113 | 528 | break; | 114 | | | 115 | 1.86M | consume(TokenType::Comma); | 116 | 1.86M | }; | 117 | | | 118 | 660 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 660 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0::operator()() const::{lambda()#1}>(bool, SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0::operator()() const::{lambda()#1}&&)Line | Count | Source | 105 | 656 | { | 106 | 656 | if (surrounded_by_parentheses) | 107 | 656 | consume(TokenType::ParenOpen); | 108 | | | 109 | 2.58M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 2.58M | parse_callback(); | 111 | | | 112 | 2.58M | if (!match(TokenType::Comma)) | 113 | 629 | break; | 114 | | | 115 | 2.58M | consume(TokenType::Comma); | 116 | 2.58M | }; | 117 | | | 118 | 656 | if (surrounded_by_parentheses) | 119 | 656 | consume(TokenType::ParenClose); | 120 | 656 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1>(bool, SQL::AST::Parser::parse_update_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1&&) Line | Count | Source | 105 | 84 | { | 106 | 84 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 1.80M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 1.80M | parse_callback(); | 111 | | | 112 | 1.80M | if (!match(TokenType::Comma)) | 113 | 58 | break; | 114 | | | 115 | 1.80M | consume(TokenType::Comma); | 116 | 1.80M | }; | 117 | | | 118 | 84 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 84 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0>(bool, SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_0&&) Line | Count | Source | 105 | 141k | { | 106 | 141k | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 15.2M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 15.2M | parse_callback(); | 111 | | | 112 | 15.2M | if (!match(TokenType::Comma)) | 113 | 127k | break; | 114 | | | 115 | 15.0M | consume(TokenType::Comma); | 116 | 15.0M | }; | 117 | | | 118 | 141k | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 141k | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1>(bool, SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_1&&) Line | Count | Source | 105 | 884 | { | 106 | 884 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 2.45M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 2.45M | parse_callback(); | 111 | | | 112 | 2.45M | if (!match(TokenType::Comma)) | 113 | 656 | break; | 114 | | | 115 | 2.45M | consume(TokenType::Comma); | 116 | 2.45M | }; | 117 | | | 118 | 884 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 884 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_2>(bool, SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_2&&) Line | Count | Source | 105 | 1.29k | { | 106 | 1.29k | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 893k | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 892k | parse_callback(); | 111 | | | 112 | 892k | if (!match(TokenType::Comma)) | 113 | 541 | break; | 114 | | | 115 | 891k | consume(TokenType::Comma); | 116 | 891k | }; | 117 | | | 118 | 1.29k | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 1.29k | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_3>(bool, SQL::AST::Parser::parse_select_statement(AK::RefPtr<SQL::AST::CommonTableExpressionList>)::$_3&&) Line | Count | Source | 105 | 1.36k | { | 106 | 1.36k | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 6.83M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 6.83M | parse_callback(); | 111 | | | 112 | 6.83M | if (!match(TokenType::Comma)) | 113 | 847 | break; | 114 | | | 115 | 6.83M | consume(TokenType::Comma); | 116 | 6.83M | }; | 117 | | | 118 | 1.36k | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 1.36k | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_common_table_expression_list()::$_0>(bool, SQL::AST::Parser::parse_common_table_expression_list()::$_0&&) Line | Count | Source | 105 | 778 | { | 106 | 778 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 106k | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 106k | parse_callback(); | 111 | | | 112 | 106k | if (!match(TokenType::Comma)) | 113 | 634 | break; | 114 | | | 115 | 105k | consume(TokenType::Comma); | 116 | 105k | }; | 117 | | | 118 | 778 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 778 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_chained_expression(bool)::$_0>(bool, SQL::AST::Parser::parse_chained_expression(bool)::$_0&&) Line | Count | Source | 105 | 484k | { | 106 | 484k | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 9.74M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 9.51M | parse_callback(); | 111 | | | 112 | 9.51M | if (!match(TokenType::Comma)) | 113 | 248k | break; | 114 | | | 115 | 9.26M | consume(TokenType::Comma); | 116 | 9.26M | }; | 117 | | | 118 | 484k | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 484k | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_in_expression(AK::NonnullRefPtr<SQL::AST::Expression>, bool)::$_0>(bool, SQL::AST::Parser::parse_in_expression(AK::NonnullRefPtr<SQL::AST::Expression>, bool)::$_0&&) Line | Count | Source | 105 | 38.9k | { | 106 | 38.9k | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 662k | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 624k | parse_callback(); | 111 | | | 112 | 624k | if (!match(TokenType::Comma)) | 113 | 857 | break; | 114 | | | 115 | 623k | consume(TokenType::Comma); | 116 | 623k | }; | 117 | | | 118 | 38.9k | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 38.9k | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_common_table_expression()::$_0>(bool, SQL::AST::Parser::parse_common_table_expression()::$_0&&) Line | Count | Source | 105 | 373 | { | 106 | 373 | if (surrounded_by_parentheses) | 107 | 373 | consume(TokenType::ParenOpen); | 108 | | | 109 | 1.73M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 1.73M | parse_callback(); | 111 | | | 112 | 1.73M | if (!match(TokenType::Comma)) | 113 | 325 | break; | 114 | | | 115 | 1.73M | consume(TokenType::Comma); | 116 | 1.73M | }; | 117 | | | 118 | 373 | if (surrounded_by_parentheses) | 119 | 373 | consume(TokenType::ParenClose); | 120 | 373 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_returning_clause()::$_0>(bool, SQL::AST::Parser::parse_returning_clause()::$_0&&) Line | Count | Source | 105 | 212 | { | 106 | 212 | if (surrounded_by_parentheses) | 107 | 0 | consume(TokenType::ParenOpen); | 108 | | | 109 | 5.11M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 5.11M | parse_callback(); | 111 | | | 112 | 5.11M | if (!match(TokenType::Comma)) | 113 | 164 | break; | 114 | | | 115 | 5.11M | consume(TokenType::Comma); | 116 | 5.11M | }; | 117 | | | 118 | 212 | if (surrounded_by_parentheses) | 119 | 0 | consume(TokenType::ParenClose); | 120 | 212 | } |
Parser.cpp:void SQL::AST::Parser::parse_comma_separated_list<SQL::AST::Parser::parse_table_or_subquery()::$_1>(bool, SQL::AST::Parser::parse_table_or_subquery()::$_1&&) Line | Count | Source | 105 | 9.19k | { | 106 | 9.19k | if (surrounded_by_parentheses) | 107 | 9.19k | consume(TokenType::ParenOpen); | 108 | | | 109 | 4.75M | while (!has_errors() && !match(TokenType::Eof)) { | 110 | 4.75M | parse_callback(); | 111 | | | 112 | 4.75M | if (!match(TokenType::Comma)) | 113 | 8.35k | break; | 114 | | | 115 | 4.74M | consume(TokenType::Comma); | 116 | 4.74M | }; | 117 | | | 118 | 9.19k | if (surrounded_by_parentheses) | 119 | 9.19k | consume(TokenType::ParenClose); | 120 | 9.19k | } |
|
121 | | |
122 | | Token consume(); |
123 | | Token consume(TokenType type); |
124 | | bool consume_if(TokenType type); |
125 | | bool match(TokenType type) const; |
126 | | |
127 | | void expected(StringView what); |
128 | | void syntax_error(ByteString message); |
129 | | |
130 | | SourcePosition position() const; |
131 | | |
132 | | ParserState m_parser_state; |
133 | | }; |
134 | | |
135 | | } |