Coverage Report

Created: 2022-05-20 06:15

/src/serenity/Userland/Shell/Parser.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2020, the SerenityOS developers.
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include "AST.h"
10
#include <AK/Function.h>
11
#include <AK/RefPtr.h>
12
#include <AK/String.h>
13
#include <AK/StringBuilder.h>
14
#include <AK/Vector.h>
15
16
namespace Shell {
17
18
class Parser {
19
public:
20
    Parser(StringView input, bool interactive = false)
21
        : m_input(move(input))
22
        , m_in_interactive_mode(interactive)
23
395
    {
24
395
    }
25
26
    RefPtr<AST::Node> parse();
27
    /// Parse the given string *as* an expression
28
    /// that is to forcefully enclose it in double-quotes.
29
    RefPtr<AST::Node> parse_as_single_expression();
30
    NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
31
32
    struct SavedOffset {
33
        size_t offset;
34
        AST::Position::Line line;
35
    };
36
    SavedOffset save_offset() const;
37
38
private:
39
    enum class ShouldReadMoreSequences {
40
        Yes,
41
        No,
42
    };
43
44
    enum class StringEndCondition {
45
        DoubleQuote,
46
        Heredoc,
47
    };
48
49
    struct SequenceParseResult {
50
        NonnullRefPtrVector<AST::Node> entries;
51
        Vector<AST::Position, 1> separator_positions;
52
        ShouldReadMoreSequences decision;
53
    };
54
55
    struct HeredocInitiationRecord {
56
        String end;
57
        RefPtr<AST::Heredoc> node;
58
        bool interpolate { false };
59
        bool deindent { false };
60
    };
61
62
    constexpr static size_t max_allowed_nested_rule_depth = 2048;
63
    RefPtr<AST::Node> parse_toplevel();
64
    SequenceParseResult parse_sequence();
65
    RefPtr<AST::Node> parse_function_decl();
66
    RefPtr<AST::Node> parse_and_logical_sequence();
67
    RefPtr<AST::Node> parse_or_logical_sequence();
68
    RefPtr<AST::Node> parse_variable_decls();
69
    RefPtr<AST::Node> parse_pipe_sequence();
70
    RefPtr<AST::Node> parse_command();
71
    RefPtr<AST::Node> parse_control_structure();
72
    RefPtr<AST::Node> parse_continuation_control();
73
    RefPtr<AST::Node> parse_for_loop();
74
    RefPtr<AST::Node> parse_loop_loop();
75
    RefPtr<AST::Node> parse_if_expr();
76
    RefPtr<AST::Node> parse_subshell();
77
    RefPtr<AST::Node> parse_match_expr();
78
    AST::MatchEntry parse_match_entry();
79
    RefPtr<AST::Node> parse_match_pattern();
80
    Optional<Regex<ECMA262>> parse_regex_pattern();
81
    RefPtr<AST::Node> parse_redirection();
82
    RefPtr<AST::Node> parse_list_expression();
83
    RefPtr<AST::Node> parse_expression();
84
    RefPtr<AST::Node> parse_string_composite();
85
    RefPtr<AST::Node> parse_string();
86
    RefPtr<AST::Node> parse_string_inner(StringEndCondition);
87
    RefPtr<AST::Node> parse_variable();
88
    RefPtr<AST::Node> parse_variable_ref();
89
    RefPtr<AST::Slice> parse_slice();
90
    RefPtr<AST::Node> parse_evaluate();
91
    RefPtr<AST::Node> parse_history_designator();
92
    RefPtr<AST::Node> parse_comment();
93
    RefPtr<AST::Node> parse_bareword();
94
    RefPtr<AST::Node> parse_glob();
95
    RefPtr<AST::Node> parse_brace_expansion();
96
    RefPtr<AST::Node> parse_brace_expansion_spec();
97
    RefPtr<AST::Node> parse_immediate_expression();
98
    RefPtr<AST::Node> parse_heredoc_initiation_record();
99
    bool parse_heredoc_entries();
100
101
    template<typename A, typename... Args>
102
    NonnullRefPtr<A> create(Args&&... args);
103
104
13.4k
    void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
105
    bool at_end() const
106
2.45G
    {
107
2.45G
        if (m_end_condition && (*m_end_condition)())
108
194k
            return true;
109
2.45G
        return m_input.length() <= m_offset;
110
2.45G
    }
111
    char peek();
112
    char consume();
113
    bool expect(char);
114
    bool expect(StringView);
115
    bool next_is(StringView);
116
117
    void restore_to(size_t offset, AST::Position::Line line)
118
311M
    {
119
311M
        m_offset = offset;
120
311M
        m_line = move(line);
121
311M
    }
122
123
127M
    AST::Position::Line line() const { return m_line; }
124
125
    StringView consume_while(Function<bool(char)>);
126
127
    struct Offset {
128
        size_t offset;
129
        AST::Position::Line line;
130
    };
131
    struct ScopedOffset {
132
        ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
133
            : offsets(offsets)
134
            , lines(lines)
135
            , offset(offset)
136
            , line({ lineno, linecol })
137
120M
        {
138
120M
            offsets.append(offset);
139
120M
            lines.append(line);
140
120M
        }
141
        ~ScopedOffset()
142
120M
        {
143
120M
            auto last = offsets.take_last();
144
120M
            VERIFY(last == offset);
145
146
0
            auto last_line = lines.take_last();
147
120M
            VERIFY(last_line == line);
148
120M
        }
149
150
        Vector<size_t>& offsets;
151
        Vector<AST::Position::Line>& lines;
152
        size_t offset;
153
        AST::Position::Line line;
154
    };
155
156
7.47M
    void restore_to(ScopedOffset const& offset) { restore_to(offset.offset, offset.line); }
157
158
    OwnPtr<ScopedOffset> push_start();
159
    Offset current_position();
160
161
    StringView m_input;
162
    size_t m_offset { 0 };
163
164
    AST::Position::Line m_line { 0, 0 };
165
166
    Vector<size_t> m_rule_start_offsets;
167
    Vector<AST::Position::Line> m_rule_start_lines;
168
169
    OwnPtr<Function<bool()>> m_end_condition;
170
    Vector<HeredocInitiationRecord> m_heredoc_initiations;
171
    Vector<char> m_extra_chars_not_allowed_in_barewords;
172
    bool m_is_in_brace_expansion_spec { false };
173
    bool m_continuation_controls_allowed { false };
174
    bool m_in_interactive_mode { false };
175
};
176
177
#if 0
178
constexpr auto the_grammar = R"(
179
toplevel :: sequence?
180
181
sequence :: variable_decls? or_logical_sequence terminator sequence
182
          | variable_decls? or_logical_sequence '&' sequence
183
          | variable_decls? or_logical_sequence
184
          | variable_decls? function_decl (terminator sequence)?
185
          | variable_decls? terminator sequence
186
187
function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
188
189
or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
190
                     | and_logical_sequence
191
192
and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
193
                      | pipe_sequence
194
195
terminator :: ';'
196
            | '\n' [?!heredoc_stack.is_empty] heredoc_entries
197
198
heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
199
200
variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
201
                | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
202
203
pipe_sequence :: command '|' pipe_sequence
204
               | command
205
               | control_structure '|' pipe_sequence
206
               | control_structure
207
208
control_structure[c] :: for_expr
209
                      | loop_expr
210
                      | if_expr
211
                      | subshell
212
                      | match_expr
213
                      | ?c: continuation_control
214
215
continuation_control :: 'break'
216
                      | 'continue'
217
218
for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
219
220
loop_expr :: 'loop' ws* '{' [c] toplevel '}'
221
222
if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
223
224
else_clause :: else '{' toplevel '}'
225
             | else if_expr
226
227
subshell :: '{' toplevel '}'
228
229
match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
230
231
match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
232
             | regex_pattern ws* '{' toplevel '}'
233
234
identifier_list :: '(' (identifier ws*)* ')'
235
236
regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
237
238
match_pattern          :: expression (ws* '|' ws* expression)*
239
240
regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
241
242
command :: redirection command
243
         | list_expression command?
244
245
redirection :: number? '>'{1,2} ' '* string_composite
246
             | number? '<' ' '* string_composite
247
             | number? '>' '&' number
248
             | number? '>' '&' '-'
249
250
list_expression :: ' '* expression (' '+ list_expression)?
251
252
expression :: evaluate expression?
253
            | string_composite expression?
254
            | comment expression?
255
            | immediate_expression expression?
256
            | history_designator expression?
257
            | '(' list_expression ')' expression?
258
259
evaluate :: '$' '(' pipe_sequence ')'
260
          | '$' [lookahead != '('] expression          {eval / dynamic resolve}
261
262
string_composite :: string string_composite?
263
                  | variable string_composite?
264
                  | bareword string_composite?
265
                  | glob string_composite?
266
                  | brace_expansion string_composite?
267
                  | heredoc_initiator string_composite?    {append to heredoc_entries}
268
269
heredoc_initiator :: '<' '<' '-' bareword         {*bareword, interpolate, no deindent}
270
                   | '<' '<' '-' "'" [^']* "'"    {*string, no interpolate, no deindent}
271
                   | '<' '<' '~' bareword         {*bareword, interpolate, deindent}
272
                   | '<' '<' '~' "'" [^']* "'"    {*bareword, no interpolate, deindent}
273
274
string :: '"' dquoted_string_inner '"'
275
        | "'" [^']* "'"
276
277
dquoted_string_inner :: '\' . dquoted_string_inner?       {concat}
278
                      | variable dquoted_string_inner?    {compose}
279
                      | . dquoted_string_inner?
280
                      | '\' 'x' xdigit*2 dquoted_string_inner?
281
                      | '\' 'u' xdigit*8 dquoted_string_inner?
282
                      | '\' [abefrnt] dquoted_string_inner?
283
284
variable :: variable_ref slice?
285
286
variable_ref :: '$' identifier
287
          | '$' '$'
288
          | '$' '?'
289
          | '$' '*'
290
          | '$' '#'
291
          | ...
292
293
slice :: '[' brace_expansion_spec ']'
294
295
comment :: '#' [^\n]*
296
297
immediate_expression :: '$' '{' immediate_function expression* '}'
298
299
immediate_function :: identifier       { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS }
300
301
history_designator :: '!' event_selector (':' word_selector_composite)?
302
303
event_selector :: '!'                  {== '-0'}
304
                | '?' bareword '?'
305
                | bareword             {number: index, otherwise: lookup}
306
307
word_selector_composite :: word_selector ('-' word_selector)?
308
309
word_selector :: number
310
               | '^'                   {== 0}
311
               | '$'                   {== end}
312
313
bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
314
          | '\' [^"'*$&#|()[\]{} ?;<>] bareword?
315
316
bareword_with_tilde_expansion :: '~' bareword?
317
318
glob :: [*?] bareword?
319
      | bareword [*?]
320
321
brace_expansion :: '{' brace_expansion_spec '}'
322
323
brace_expansion_spec :: expression? (',' expression?)*
324
                      | expression '..' expression
325
)";
326
#endif
327
328
}