/src/serenity/Userland/Shell/Parser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2020, the SerenityOS developers. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include "AST.h" |
10 | | #include <AK/Function.h> |
11 | | #include <AK/RefPtr.h> |
12 | | #include <AK/String.h> |
13 | | #include <AK/StringBuilder.h> |
14 | | #include <AK/Vector.h> |
15 | | |
16 | | namespace Shell { |
17 | | |
18 | | class Parser { |
19 | | public: |
20 | | Parser(StringView input, bool interactive = false) |
21 | | : m_input(move(input)) |
22 | | , m_in_interactive_mode(interactive) |
23 | 395 | { |
24 | 395 | } |
25 | | |
26 | | RefPtr<AST::Node> parse(); |
27 | | /// Parse the given string *as* an expression |
28 | | /// that is to forcefully enclose it in double-quotes. |
29 | | RefPtr<AST::Node> parse_as_single_expression(); |
30 | | NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions(); |
31 | | |
32 | | struct SavedOffset { |
33 | | size_t offset; |
34 | | AST::Position::Line line; |
35 | | }; |
36 | | SavedOffset save_offset() const; |
37 | | |
38 | | private: |
39 | | enum class ShouldReadMoreSequences { |
40 | | Yes, |
41 | | No, |
42 | | }; |
43 | | |
44 | | enum class StringEndCondition { |
45 | | DoubleQuote, |
46 | | Heredoc, |
47 | | }; |
48 | | |
49 | | struct SequenceParseResult { |
50 | | NonnullRefPtrVector<AST::Node> entries; |
51 | | Vector<AST::Position, 1> separator_positions; |
52 | | ShouldReadMoreSequences decision; |
53 | | }; |
54 | | |
55 | | struct HeredocInitiationRecord { |
56 | | String end; |
57 | | RefPtr<AST::Heredoc> node; |
58 | | bool interpolate { false }; |
59 | | bool deindent { false }; |
60 | | }; |
61 | | |
62 | | constexpr static size_t max_allowed_nested_rule_depth = 2048; |
63 | | RefPtr<AST::Node> parse_toplevel(); |
64 | | SequenceParseResult parse_sequence(); |
65 | | RefPtr<AST::Node> parse_function_decl(); |
66 | | RefPtr<AST::Node> parse_and_logical_sequence(); |
67 | | RefPtr<AST::Node> parse_or_logical_sequence(); |
68 | | RefPtr<AST::Node> parse_variable_decls(); |
69 | | RefPtr<AST::Node> parse_pipe_sequence(); |
70 | | RefPtr<AST::Node> parse_command(); |
71 | | RefPtr<AST::Node> parse_control_structure(); |
72 | | RefPtr<AST::Node> parse_continuation_control(); |
73 | | RefPtr<AST::Node> parse_for_loop(); |
74 | | RefPtr<AST::Node> parse_loop_loop(); |
75 | | RefPtr<AST::Node> parse_if_expr(); |
76 | | RefPtr<AST::Node> parse_subshell(); |
77 | | RefPtr<AST::Node> parse_match_expr(); |
78 | | AST::MatchEntry parse_match_entry(); |
79 | | RefPtr<AST::Node> parse_match_pattern(); |
80 | | Optional<Regex<ECMA262>> parse_regex_pattern(); |
81 | | RefPtr<AST::Node> parse_redirection(); |
82 | | RefPtr<AST::Node> parse_list_expression(); |
83 | | RefPtr<AST::Node> parse_expression(); |
84 | | RefPtr<AST::Node> parse_string_composite(); |
85 | | RefPtr<AST::Node> parse_string(); |
86 | | RefPtr<AST::Node> parse_string_inner(StringEndCondition); |
87 | | RefPtr<AST::Node> parse_variable(); |
88 | | RefPtr<AST::Node> parse_variable_ref(); |
89 | | RefPtr<AST::Slice> parse_slice(); |
90 | | RefPtr<AST::Node> parse_evaluate(); |
91 | | RefPtr<AST::Node> parse_history_designator(); |
92 | | RefPtr<AST::Node> parse_comment(); |
93 | | RefPtr<AST::Node> parse_bareword(); |
94 | | RefPtr<AST::Node> parse_glob(); |
95 | | RefPtr<AST::Node> parse_brace_expansion(); |
96 | | RefPtr<AST::Node> parse_brace_expansion_spec(); |
97 | | RefPtr<AST::Node> parse_immediate_expression(); |
98 | | RefPtr<AST::Node> parse_heredoc_initiation_record(); |
99 | | bool parse_heredoc_entries(); |
100 | | |
101 | | template<typename A, typename... Args> |
102 | | NonnullRefPtr<A> create(Args&&... args); |
103 | | |
104 | 13.4k | void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); } |
105 | | bool at_end() const |
106 | 2.45G | { |
107 | 2.45G | if (m_end_condition && (*m_end_condition)()) |
108 | 194k | return true; |
109 | 2.45G | return m_input.length() <= m_offset; |
110 | 2.45G | } |
111 | | char peek(); |
112 | | char consume(); |
113 | | bool expect(char); |
114 | | bool expect(StringView); |
115 | | bool next_is(StringView); |
116 | | |
117 | | void restore_to(size_t offset, AST::Position::Line line) |
118 | 311M | { |
119 | 311M | m_offset = offset; |
120 | 311M | m_line = move(line); |
121 | 311M | } |
122 | | |
123 | 127M | AST::Position::Line line() const { return m_line; } |
124 | | |
125 | | StringView consume_while(Function<bool(char)>); |
126 | | |
127 | | struct Offset { |
128 | | size_t offset; |
129 | | AST::Position::Line line; |
130 | | }; |
131 | | struct ScopedOffset { |
132 | | ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol) |
133 | | : offsets(offsets) |
134 | | , lines(lines) |
135 | | , offset(offset) |
136 | | , line({ lineno, linecol }) |
137 | 120M | { |
138 | 120M | offsets.append(offset); |
139 | 120M | lines.append(line); |
140 | 120M | } |
141 | | ~ScopedOffset() |
142 | 120M | { |
143 | 120M | auto last = offsets.take_last(); |
144 | 120M | VERIFY(last == offset); |
145 | | |
146 | 0 | auto last_line = lines.take_last(); |
147 | 120M | VERIFY(last_line == line); |
148 | 120M | } |
149 | | |
150 | | Vector<size_t>& offsets; |
151 | | Vector<AST::Position::Line>& lines; |
152 | | size_t offset; |
153 | | AST::Position::Line line; |
154 | | }; |
155 | | |
156 | 7.47M | void restore_to(ScopedOffset const& offset) { restore_to(offset.offset, offset.line); } |
157 | | |
158 | | OwnPtr<ScopedOffset> push_start(); |
159 | | Offset current_position(); |
160 | | |
161 | | StringView m_input; |
162 | | size_t m_offset { 0 }; |
163 | | |
164 | | AST::Position::Line m_line { 0, 0 }; |
165 | | |
166 | | Vector<size_t> m_rule_start_offsets; |
167 | | Vector<AST::Position::Line> m_rule_start_lines; |
168 | | |
169 | | OwnPtr<Function<bool()>> m_end_condition; |
170 | | Vector<HeredocInitiationRecord> m_heredoc_initiations; |
171 | | Vector<char> m_extra_chars_not_allowed_in_barewords; |
172 | | bool m_is_in_brace_expansion_spec { false }; |
173 | | bool m_continuation_controls_allowed { false }; |
174 | | bool m_in_interactive_mode { false }; |
175 | | }; |
176 | | |
177 | | #if 0 |
178 | | constexpr auto the_grammar = R"( |
179 | | toplevel :: sequence? |
180 | | |
181 | | sequence :: variable_decls? or_logical_sequence terminator sequence |
182 | | | variable_decls? or_logical_sequence '&' sequence |
183 | | | variable_decls? or_logical_sequence |
184 | | | variable_decls? function_decl (terminator sequence)? |
185 | | | variable_decls? terminator sequence |
186 | | |
187 | | function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}' |
188 | | |
189 | | or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence |
190 | | | and_logical_sequence |
191 | | |
192 | | and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence |
193 | | | pipe_sequence |
194 | | |
195 | | terminator :: ';' |
196 | | | '\n' [?!heredoc_stack.is_empty] heredoc_entries |
197 | | |
198 | | heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries] |
199 | | |
200 | | variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '* |
201 | | | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '* |
202 | | |
203 | | pipe_sequence :: command '|' pipe_sequence |
204 | | | command |
205 | | | control_structure '|' pipe_sequence |
206 | | | control_structure |
207 | | |
208 | | control_structure[c] :: for_expr |
209 | | | loop_expr |
210 | | | if_expr |
211 | | | subshell |
212 | | | match_expr |
213 | | | ?c: continuation_control |
214 | | |
215 | | continuation_control :: 'break' |
216 | | | 'continue' |
217 | | |
218 | | for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}' |
219 | | |
220 | | loop_expr :: 'loop' ws* '{' [c] toplevel '}' |
221 | | |
222 | | if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause? |
223 | | |
224 | | else_clause :: else '{' toplevel '}' |
225 | | | else if_expr |
226 | | |
227 | | subshell :: '{' toplevel '}' |
228 | | |
229 | | match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}' |
230 | | |
231 | | match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}' |
232 | | | regex_pattern ws* '{' toplevel '}' |
233 | | |
234 | | identifier_list :: '(' (identifier ws*)* ')' |
235 | | |
236 | | regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)* |
237 | | |
238 | | match_pattern :: expression (ws* '|' ws* expression)* |
239 | | |
240 | | regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses } |
241 | | |
242 | | command :: redirection command |
243 | | | list_expression command? |
244 | | |
245 | | redirection :: number? '>'{1,2} ' '* string_composite |
246 | | | number? '<' ' '* string_composite |
247 | | | number? '>' '&' number |
248 | | | number? '>' '&' '-' |
249 | | |
250 | | list_expression :: ' '* expression (' '+ list_expression)? |
251 | | |
252 | | expression :: evaluate expression? |
253 | | | string_composite expression? |
254 | | | comment expression? |
255 | | | immediate_expression expression? |
256 | | | history_designator expression? |
257 | | | '(' list_expression ')' expression? |
258 | | |
259 | | evaluate :: '$' '(' pipe_sequence ')' |
260 | | | '$' [lookahead != '('] expression {eval / dynamic resolve} |
261 | | |
262 | | string_composite :: string string_composite? |
263 | | | variable string_composite? |
264 | | | bareword string_composite? |
265 | | | glob string_composite? |
266 | | | brace_expansion string_composite? |
267 | | | heredoc_initiator string_composite? {append to heredoc_entries} |
268 | | |
269 | | heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent} |
270 | | | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent} |
271 | | | '<' '<' '~' bareword {*bareword, interpolate, deindent} |
272 | | | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent} |
273 | | |
274 | | string :: '"' dquoted_string_inner '"' |
275 | | | "'" [^']* "'" |
276 | | |
277 | | dquoted_string_inner :: '\' . dquoted_string_inner? {concat} |
278 | | | variable dquoted_string_inner? {compose} |
279 | | | . dquoted_string_inner? |
280 | | | '\' 'x' xdigit*2 dquoted_string_inner? |
281 | | | '\' 'u' xdigit*8 dquoted_string_inner? |
282 | | | '\' [abefrnt] dquoted_string_inner? |
283 | | |
284 | | variable :: variable_ref slice? |
285 | | |
286 | | variable_ref :: '$' identifier |
287 | | | '$' '$' |
288 | | | '$' '?' |
289 | | | '$' '*' |
290 | | | '$' '#' |
291 | | | ... |
292 | | |
293 | | slice :: '[' brace_expansion_spec ']' |
294 | | |
295 | | comment :: '#' [^\n]* |
296 | | |
297 | | immediate_expression :: '$' '{' immediate_function expression* '}' |
298 | | |
299 | | immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS } |
300 | | |
301 | | history_designator :: '!' event_selector (':' word_selector_composite)? |
302 | | |
303 | | event_selector :: '!' {== '-0'} |
304 | | | '?' bareword '?' |
305 | | | bareword {number: index, otherwise: lookup} |
306 | | |
307 | | word_selector_composite :: word_selector ('-' word_selector)? |
308 | | |
309 | | word_selector :: number |
310 | | | '^' {== 0} |
311 | | | '$' {== end} |
312 | | |
313 | | bareword :: [^"'*$&#|()[\]{} ?;<>] bareword? |
314 | | | '\' [^"'*$&#|()[\]{} ?;<>] bareword? |
315 | | |
316 | | bareword_with_tilde_expansion :: '~' bareword? |
317 | | |
318 | | glob :: [*?] bareword? |
319 | | | bareword [*?] |
320 | | |
321 | | brace_expansion :: '{' brace_expansion_spec '}' |
322 | | |
323 | | brace_expansion_spec :: expression? (',' expression?)* |
324 | | | expression '..' expression |
325 | | )"; |
326 | | #endif |
327 | | |
328 | | } |