/src/serenity/Userland/Libraries/LibShell/PosixLexer.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/GenericLexer.h> |
10 | | #include <AK/Queue.h> |
11 | | #include <AK/String.h> |
12 | | #include <AK/TemporaryChange.h> |
13 | | #include <AK/Variant.h> |
14 | | #include <AK/Vector.h> |
15 | | #include <LibShell/AST.h> |
16 | | |
17 | | namespace Shell::Posix { |
18 | | |
19 | | enum class Reduction { |
20 | | None, |
21 | | End, |
22 | | Operator, |
23 | | Comment, |
24 | | SingleQuotedString, |
25 | | DoubleQuotedString, |
26 | | Expansion, |
27 | | CommandExpansion, |
28 | | Start, |
29 | | ArithmeticExpansion, |
30 | | SpecialParameterExpansion, |
31 | | ParameterExpansion, |
32 | | CommandOrArithmeticSubstitutionExpansion, |
33 | | ExtendedParameterExpansion, |
34 | | |
35 | | // Separate rule, not used by the main flow. |
36 | | HeredocContents, |
37 | | }; |
38 | | |
39 | | struct ExpansionRange { |
40 | | size_t start; |
41 | | size_t length; |
42 | | }; |
43 | | |
44 | | struct ParameterExpansion { |
45 | | StringBuilder parameter; |
46 | | ExpansionRange range; |
47 | | }; |
48 | | |
49 | | struct CommandExpansion { |
50 | | StringBuilder command; |
51 | | ExpansionRange range; |
52 | | }; |
53 | | |
54 | | struct ArithmeticExpansion { |
55 | | String expression; |
56 | | StringBuilder value; |
57 | | ExpansionRange range; |
58 | | }; |
59 | | |
60 | | using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>; |
61 | | |
62 | | struct ResolvedParameterExpansion { |
63 | | String parameter; |
64 | | String argument; |
65 | | ExpansionRange range; |
66 | | enum class Op { |
67 | | UseDefaultValue, // ${parameter:-word} |
68 | | AssignDefaultValue, // ${parameter:=word} |
69 | | IndicateErrorIfEmpty, // ${parameter:?word} |
70 | | UseAlternativeValue, // ${parameter:+word} |
71 | | UseDefaultValueIfUnset, // ${parameter-default} |
72 | | AssignDefaultValueIfUnset, // ${parameter=default} |
73 | | IndicateErrorIfUnset, // ${parameter?default} |
74 | | UseAlternativeValueIfUnset, // ${parameter+default} |
75 | | RemoveLargestSuffixByPattern, // ${parameter%%pattern} |
76 | | RemoveLargestPrefixByPattern, // ${parameter##pattern} |
77 | | RemoveSmallestSuffixByPattern, // ${parameter%pattern} |
78 | | RemoveSmallestPrefixByPattern, // ${parameter#pattern} |
79 | | StringLength, // ${#parameter} |
80 | | GetPositionalParameter, // ${parameter} |
81 | | GetVariable, // ${parameter} |
82 | | GetLastBackgroundPid, // $! |
83 | | GetPositionalParameterList, // $* |
84 | | GetCurrentOptionFlags, // $- |
85 | | GetPositionalParameterCount, // $# |
86 | | GetLastExitStatus, // $? |
87 | | GetPositionalParameterListAsString, // $@ |
88 | | GetShellProcessId, // $$ |
89 | | } op; |
90 | | |
91 | | enum class Expand { |
92 | | Nothing, |
93 | | Word, |
94 | | } expand { Expand::Nothing }; |
95 | | |
96 | | ByteString to_byte_string() const |
97 | 0 | { |
98 | 0 | StringBuilder builder; |
99 | 0 | builder.append("{"sv); |
100 | 0 | switch (op) { |
101 | 0 | case Op::UseDefaultValue: |
102 | 0 | builder.append("UseDefaultValue"sv); |
103 | 0 | break; |
104 | 0 | case Op::AssignDefaultValue: |
105 | 0 | builder.append("AssignDefaultValue"sv); |
106 | 0 | break; |
107 | 0 | case Op::IndicateErrorIfEmpty: |
108 | 0 | builder.append("IndicateErrorIfEmpty"sv); |
109 | 0 | break; |
110 | 0 | case Op::UseAlternativeValue: |
111 | 0 | builder.append("UseAlternativeValue"sv); |
112 | 0 | break; |
113 | 0 | case Op::UseDefaultValueIfUnset: |
114 | 0 | builder.append("UseDefaultValueIfUnset"sv); |
115 | 0 | break; |
116 | 0 | case Op::AssignDefaultValueIfUnset: |
117 | 0 | builder.append("AssignDefaultValueIfUnset"sv); |
118 | 0 | break; |
119 | 0 | case Op::IndicateErrorIfUnset: |
120 | 0 | builder.append("IndicateErrorIfUnset"sv); |
121 | 0 | break; |
122 | 0 | case Op::UseAlternativeValueIfUnset: |
123 | 0 | builder.append("UseAlternativeValueIfUnset"sv); |
124 | 0 | break; |
125 | 0 | case Op::RemoveLargestSuffixByPattern: |
126 | 0 | builder.append("RemoveLargestSuffixByPattern"sv); |
127 | 0 | break; |
128 | 0 | case Op::RemoveLargestPrefixByPattern: |
129 | 0 | builder.append("RemoveLargestPrefixByPattern"sv); |
130 | 0 | break; |
131 | 0 | case Op::RemoveSmallestSuffixByPattern: |
132 | 0 | builder.append("RemoveSmallestSuffixByPattern"sv); |
133 | 0 | break; |
134 | 0 | case Op::RemoveSmallestPrefixByPattern: |
135 | 0 | builder.append("RemoveSmallestPrefixByPattern"sv); |
136 | 0 | break; |
137 | 0 | case Op::StringLength: |
138 | 0 | builder.append("StringLength"sv); |
139 | 0 | break; |
140 | 0 | case Op::GetPositionalParameter: |
141 | 0 | builder.append("GetPositionalParameter"sv); |
142 | 0 | break; |
143 | 0 | case Op::GetLastBackgroundPid: |
144 | 0 | builder.append("GetLastBackgroundPid"sv); |
145 | 0 | break; |
146 | 0 | case Op::GetPositionalParameterList: |
147 | 0 | builder.append("GetPositionalParameterList"sv); |
148 | 0 | break; |
149 | 0 | case Op::GetCurrentOptionFlags: |
150 | 0 | builder.append("GetCurrentOptionFlags"sv); |
151 | 0 | break; |
152 | 0 | case Op::GetPositionalParameterCount: |
153 | 0 | builder.append("GetPositionalParameterCount"sv); |
154 | 0 | break; |
155 | 0 | case Op::GetLastExitStatus: |
156 | 0 | builder.append("GetLastExitStatus"sv); |
157 | 0 | break; |
158 | 0 | case Op::GetPositionalParameterListAsString: |
159 | 0 | builder.append("GetPositionalParameterListAsString"sv); |
160 | 0 | break; |
161 | 0 | case Op::GetShellProcessId: |
162 | 0 | builder.append("GetShellProcessId"sv); |
163 | 0 | break; |
164 | 0 | case Op::GetVariable: |
165 | 0 | builder.append("GetVariable"sv); |
166 | 0 | break; |
167 | 0 | } |
168 | 0 | builder.append(" "sv); |
169 | 0 | builder.append(parameter); |
170 | 0 | builder.append(" ("sv); |
171 | 0 | builder.append(argument); |
172 | 0 | builder.append(")"sv); |
173 | 0 | builder.append("}"sv); |
174 | 0 | return builder.to_byte_string(); |
175 | 0 | } |
176 | | }; |
177 | | |
178 | | struct ResolvedCommandExpansion { |
179 | | RefPtr<AST::Node> command; |
180 | | ExpansionRange range; |
181 | | }; |
182 | | |
183 | | struct ResolvedArithmeticExpansion { |
184 | | String source_expression; |
185 | | ExpansionRange range; |
186 | | }; |
187 | | |
188 | | using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion, ResolvedArithmeticExpansion>; |
189 | | |
190 | | struct HeredocEntry { |
191 | | String key; |
192 | | bool allow_interpolation; |
193 | | bool dedent; |
194 | | }; |
195 | | |
196 | | struct State { |
197 | | StringBuilder buffer {}; |
198 | | Reduction previous_reduction { Reduction::Start }; |
199 | | bool escaping { false }; |
200 | | bool in_skip_mode { false }; |
201 | | AST::Position position { |
202 | | .start_offset = 0, |
203 | | .end_offset = 0, |
204 | | .start_line = { |
205 | | .line_number = 0, |
206 | | .line_column = 0, |
207 | | }, |
208 | | .end_line = { |
209 | | .line_number = 0, |
210 | | .line_column = 0, |
211 | | }, |
212 | | }; |
213 | | Vector<Expansion> expansions {}; |
214 | | Vector<HeredocEntry> heredoc_entries {}; |
215 | | bool on_new_line { true }; |
216 | | }; |
217 | | |
218 | | struct Token { |
219 | | enum class Type { |
220 | | Eof, |
221 | | Newline, |
222 | | Continuation, |
223 | | Token, |
224 | | And, |
225 | | Pipe, |
226 | | OpenParen, |
227 | | CloseParen, |
228 | | Great, |
229 | | Less, |
230 | | AndIf, |
231 | | OrIf, |
232 | | DoubleSemicolon, |
233 | | DoubleLess, |
234 | | DoubleGreat, |
235 | | LessAnd, |
236 | | GreatAnd, |
237 | | LessGreat, |
238 | | DoubleLessDash, |
239 | | Clobber, |
240 | | Semicolon, |
241 | | HeredocContents, |
242 | | |
243 | | // Not produced by this lexer, but generated in later stages. |
244 | | AssignmentWord, |
245 | | ListAssignmentWord, |
246 | | Bang, |
247 | | Case, |
248 | | CloseBrace, |
249 | | Do, |
250 | | Done, |
251 | | Elif, |
252 | | Else, |
253 | | Esac, |
254 | | Fi, |
255 | | For, |
256 | | If, |
257 | | In, |
258 | | IoNumber, |
259 | | OpenBrace, |
260 | | Then, |
261 | | Until, |
262 | | VariableName, |
263 | | While, |
264 | | Word, |
265 | | }; |
266 | | |
267 | | Type type; |
268 | | String value; |
269 | | Optional<AST::Position> position; |
270 | | Vector<Expansion> expansions; |
271 | | Vector<ResolvedExpansion> resolved_expansions {}; |
272 | | StringView original_text; |
273 | | Optional<String> relevant_heredoc_key {}; |
274 | | bool could_be_start_of_a_simple_command { false }; |
275 | | |
276 | | static ErrorOr<Vector<Token>> maybe_from_state(State const& state) |
277 | 12.7M | { |
278 | 12.7M | if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty()) |
279 | 5.51M | return Vector<Token> {}; |
280 | | |
281 | 7.22M | auto token = Token { |
282 | 7.22M | .type = Type::Token, |
283 | 7.22M | .value = TRY(state.buffer.to_string()), |
284 | 0 | .position = state.position, |
285 | 7.22M | .expansions = state.expansions, |
286 | 7.22M | .original_text = {}, |
287 | 7.22M | }; |
288 | 0 | return Vector<Token> { move(token) }; |
289 | 7.22M | } |
290 | | |
291 | | static Optional<Token::Type> operator_from_name(StringView name) |
292 | 81.4M | { |
293 | 81.4M | if (name == "&&"sv) |
294 | 359k | return Token::Type::AndIf; |
295 | 81.0M | if (name == "||"sv) |
296 | 1.98M | return Token::Type::OrIf; |
297 | 79.0M | if (name == ";;"sv) |
298 | 22.9k | return Token::Type::DoubleSemicolon; |
299 | 79.0M | if (name == "<<"sv) |
300 | 2.29M | return Token::Type::DoubleLess; |
301 | 76.7M | if (name == ">>"sv) |
302 | 34.5k | return Token::Type::DoubleGreat; |
303 | 76.7M | if (name == "<&"sv) |
304 | 199k | return Token::Type::LessAnd; |
305 | 76.5M | if (name == ">&"sv) |
306 | 560k | return Token::Type::GreatAnd; |
307 | 75.9M | if (name == "<>"sv) |
308 | 12.4k | return Token::Type::LessGreat; |
309 | 75.9M | if (name == "<<-"sv) |
310 | 10.8k | return Token::Type::DoubleLessDash; |
311 | 75.9M | if (name == ">|"sv) |
312 | 312 | return Token::Type::Clobber; |
313 | 75.9M | if (name == ";"sv) |
314 | 2.55M | return Token::Type::Semicolon; |
315 | 73.4M | if (name == "&"sv) |
316 | 2.10M | return Token::Type::And; |
317 | 71.3M | if (name == "|"sv) |
318 | 3.35M | return Token::Type::Pipe; |
319 | 67.9M | if (name == ">"sv) |
320 | 2.22M | return Token::Type::Great; |
321 | 65.7M | if (name == "<"sv) |
322 | 2.50M | return Token::Type::Less; |
323 | 63.2M | if (name == "\n"sv) |
324 | 0 | return Token::Type::Newline; |
325 | 63.2M | if (name == "("sv) |
326 | 4.31M | return Token::Type::OpenParen; |
327 | 58.8M | if (name == "{"sv) |
328 | 2.38M | return Token::Type::OpenBrace; |
329 | 56.5M | if (name == ")"sv) |
330 | 1.73M | return Token::Type::CloseParen; |
331 | 54.7M | if (name == "}"sv) |
332 | 580k | return Token::Type::CloseBrace; |
333 | | |
334 | 54.1M | return {}; |
335 | 54.7M | } |
336 | | |
337 | | static ErrorOr<Vector<Token>> operators_from(State const& state) |
338 | 8.46M | { |
339 | 8.46M | auto name = TRY(state.buffer.to_string()); |
340 | 0 | auto type = operator_from_name(name); |
341 | 8.46M | if (!type.has_value()) |
342 | 0 | return Vector<Token> {}; |
343 | | |
344 | 8.46M | return Vector { |
345 | 8.46M | Token { |
346 | 8.46M | .type = *type, |
347 | 8.46M | .value = name, |
348 | 8.46M | .position = state.position, |
349 | 8.46M | .expansions = {}, |
350 | 8.46M | .original_text = {}, |
351 | 8.46M | } |
352 | 8.46M | }; |
353 | 8.46M | } |
354 | | |
355 | | static Token eof() |
356 | 1.12M | { |
357 | 1.12M | return { |
358 | 1.12M | .type = Type::Eof, |
359 | 1.12M | .value = {}, |
360 | 1.12M | .position = {}, |
361 | 1.12M | .expansions = {}, |
362 | 1.12M | .original_text = {}, |
363 | 1.12M | }; |
364 | 1.12M | } |
365 | | |
366 | | static Token newline() |
367 | 1.99M | { |
368 | 1.99M | return { |
369 | 1.99M | .type = Type::Newline, |
370 | 1.99M | .value = "\n"_string, |
371 | 1.99M | .position = {}, |
372 | 1.99M | .expansions = {}, |
373 | 1.99M | .original_text = {}, |
374 | 1.99M | }; |
375 | 1.99M | } |
376 | | |
377 | | static Token continuation(char expected) |
378 | 11.5k | { |
379 | 11.5k | return { |
380 | 11.5k | .type = Type::Continuation, |
381 | 11.5k | .value = String::from_code_point(expected), |
382 | 11.5k | .position = {}, |
383 | 11.5k | .expansions = {}, |
384 | 11.5k | .original_text = {}, |
385 | 11.5k | }; |
386 | 11.5k | } |
387 | | |
388 | | static Token continuation(String expected) |
389 | 13.0k | { |
390 | 13.0k | return { |
391 | 13.0k | .type = Type::Continuation, |
392 | 13.0k | .value = move(expected), |
393 | 13.0k | .position = {}, |
394 | 13.0k | .expansions = {}, |
395 | 13.0k | .original_text = {}, |
396 | 13.0k | }; |
397 | 13.0k | } |
398 | | |
399 | | StringView type_name() const; |
400 | | }; |
401 | | |
402 | | class Lexer { |
403 | | public: |
404 | | explicit Lexer(StringView input) |
405 | 554k | : m_lexer(input) |
406 | 554k | { |
407 | 554k | } |
408 | | |
409 | | ErrorOr<Vector<Token>> batch_next(Optional<Reduction> starting_reduction = {}); |
410 | | |
411 | | struct HeredocKeyResult { |
412 | | String key; |
413 | | bool allow_interpolation; |
414 | | }; |
415 | | |
416 | | static HeredocKeyResult process_heredoc_key(Token const&); |
417 | | |
418 | | private: |
419 | | struct ReductionResult { |
420 | | Vector<Token> tokens; |
421 | | Reduction next_reduction { Reduction::None }; |
422 | | }; |
423 | | |
424 | | ErrorOr<ReductionResult> reduce(Reduction); |
425 | | ErrorOr<ReductionResult> reduce_end(); |
426 | | ErrorOr<ReductionResult> reduce_operator(); |
427 | | ErrorOr<ReductionResult> reduce_comment(); |
428 | | ErrorOr<ReductionResult> reduce_single_quoted_string(); |
429 | | ErrorOr<ReductionResult> reduce_double_quoted_string(); |
430 | | ErrorOr<ReductionResult> reduce_expansion(); |
431 | | ErrorOr<ReductionResult> reduce_command_expansion(); |
432 | | ErrorOr<ReductionResult> reduce_start(); |
433 | | ErrorOr<ReductionResult> reduce_arithmetic_expansion(); |
434 | | ErrorOr<ReductionResult> reduce_special_parameter_expansion(); |
435 | | ErrorOr<ReductionResult> reduce_parameter_expansion(); |
436 | | ErrorOr<ReductionResult> reduce_command_or_arithmetic_substitution_expansion(); |
437 | | ErrorOr<ReductionResult> reduce_extended_parameter_expansion(); |
438 | | ErrorOr<ReductionResult> reduce_heredoc_contents(); |
439 | | |
440 | | struct SkipTokens { |
441 | | explicit SkipTokens(Lexer& lexer) |
442 | 144k | : m_state_change(lexer.m_state, lexer.m_state) |
443 | 144k | { |
444 | 144k | lexer.m_state.in_skip_mode = true; |
445 | 144k | } |
446 | | |
447 | | TemporaryChange<State> m_state_change; |
448 | | }; |
449 | | |
450 | 144k | SkipTokens switch_to_skip_mode() { return SkipTokens { *this }; } |
451 | | |
452 | | char consume(); |
453 | | bool consume_specific(char); |
454 | | void reconsume(StringView); |
455 | | ExpansionRange range(ssize_t offset = 0) const; |
456 | | |
457 | | GenericLexer m_lexer; |
458 | | State m_state; |
459 | | Reduction m_next_reduction { Reduction::Start }; |
460 | | }; |
461 | | |
462 | | } |