Coverage Report

Created: 2025-03-04 07:22

/src/serenity/Userland/Libraries/LibShell/PosixLexer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include <AK/GenericLexer.h>
10
#include <AK/Queue.h>
11
#include <AK/String.h>
12
#include <AK/TemporaryChange.h>
13
#include <AK/Variant.h>
14
#include <AK/Vector.h>
15
#include <LibShell/AST.h>
16
17
namespace Shell::Posix {
18
19
enum class Reduction {
20
    None,
21
    End,
22
    Operator,
23
    Comment,
24
    SingleQuotedString,
25
    DoubleQuotedString,
26
    Expansion,
27
    CommandExpansion,
28
    Start,
29
    ArithmeticExpansion,
30
    SpecialParameterExpansion,
31
    ParameterExpansion,
32
    CommandOrArithmeticSubstitutionExpansion,
33
    ExtendedParameterExpansion,
34
35
    // Separate rule, not used by the main flow.
36
    HeredocContents,
37
};
38
39
struct ExpansionRange {
40
    size_t start;
41
    size_t length;
42
};
43
44
struct ParameterExpansion {
45
    StringBuilder parameter;
46
    ExpansionRange range;
47
};
48
49
struct CommandExpansion {
50
    StringBuilder command;
51
    ExpansionRange range;
52
};
53
54
struct ArithmeticExpansion {
55
    String expression;
56
    StringBuilder value;
57
    ExpansionRange range;
58
};
59
60
using Expansion = Variant<ParameterExpansion, CommandExpansion, ArithmeticExpansion>;
61
62
struct ResolvedParameterExpansion {
63
    String parameter;
64
    String argument;
65
    ExpansionRange range;
66
    enum class Op {
67
        UseDefaultValue,                    // ${parameter:-word}
68
        AssignDefaultValue,                 // ${parameter:=word}
69
        IndicateErrorIfEmpty,               // ${parameter:?word}
70
        UseAlternativeValue,                // ${parameter:+word}
71
        UseDefaultValueIfUnset,             // ${parameter-default}
72
        AssignDefaultValueIfUnset,          // ${parameter=default}
73
        IndicateErrorIfUnset,               // ${parameter?default}
74
        UseAlternativeValueIfUnset,         // ${parameter+default}
75
        RemoveLargestSuffixByPattern,       // ${parameter%%pattern}
76
        RemoveLargestPrefixByPattern,       // ${parameter##pattern}
77
        RemoveSmallestSuffixByPattern,      // ${parameter%pattern}
78
        RemoveSmallestPrefixByPattern,      // ${parameter#pattern}
79
        StringLength,                       // ${#parameter}
80
        GetPositionalParameter,             // ${parameter}
81
        GetVariable,                        // ${parameter}
82
        GetLastBackgroundPid,               // $!
83
        GetPositionalParameterList,         // $*
84
        GetCurrentOptionFlags,              // $-
85
        GetPositionalParameterCount,        // $#
86
        GetLastExitStatus,                  // $?
87
        GetPositionalParameterListAsString, // $@
88
        GetShellProcessId,                  // $$
89
    } op;
90
91
    enum class Expand {
92
        Nothing,
93
        Word,
94
    } expand { Expand::Nothing };
95
96
    ByteString to_byte_string() const
97
0
    {
98
0
        StringBuilder builder;
99
0
        builder.append("{"sv);
100
0
        switch (op) {
101
0
        case Op::UseDefaultValue:
102
0
            builder.append("UseDefaultValue"sv);
103
0
            break;
104
0
        case Op::AssignDefaultValue:
105
0
            builder.append("AssignDefaultValue"sv);
106
0
            break;
107
0
        case Op::IndicateErrorIfEmpty:
108
0
            builder.append("IndicateErrorIfEmpty"sv);
109
0
            break;
110
0
        case Op::UseAlternativeValue:
111
0
            builder.append("UseAlternativeValue"sv);
112
0
            break;
113
0
        case Op::UseDefaultValueIfUnset:
114
0
            builder.append("UseDefaultValueIfUnset"sv);
115
0
            break;
116
0
        case Op::AssignDefaultValueIfUnset:
117
0
            builder.append("AssignDefaultValueIfUnset"sv);
118
0
            break;
119
0
        case Op::IndicateErrorIfUnset:
120
0
            builder.append("IndicateErrorIfUnset"sv);
121
0
            break;
122
0
        case Op::UseAlternativeValueIfUnset:
123
0
            builder.append("UseAlternativeValueIfUnset"sv);
124
0
            break;
125
0
        case Op::RemoveLargestSuffixByPattern:
126
0
            builder.append("RemoveLargestSuffixByPattern"sv);
127
0
            break;
128
0
        case Op::RemoveLargestPrefixByPattern:
129
0
            builder.append("RemoveLargestPrefixByPattern"sv);
130
0
            break;
131
0
        case Op::RemoveSmallestSuffixByPattern:
132
0
            builder.append("RemoveSmallestSuffixByPattern"sv);
133
0
            break;
134
0
        case Op::RemoveSmallestPrefixByPattern:
135
0
            builder.append("RemoveSmallestPrefixByPattern"sv);
136
0
            break;
137
0
        case Op::StringLength:
138
0
            builder.append("StringLength"sv);
139
0
            break;
140
0
        case Op::GetPositionalParameter:
141
0
            builder.append("GetPositionalParameter"sv);
142
0
            break;
143
0
        case Op::GetLastBackgroundPid:
144
0
            builder.append("GetLastBackgroundPid"sv);
145
0
            break;
146
0
        case Op::GetPositionalParameterList:
147
0
            builder.append("GetPositionalParameterList"sv);
148
0
            break;
149
0
        case Op::GetCurrentOptionFlags:
150
0
            builder.append("GetCurrentOptionFlags"sv);
151
0
            break;
152
0
        case Op::GetPositionalParameterCount:
153
0
            builder.append("GetPositionalParameterCount"sv);
154
0
            break;
155
0
        case Op::GetLastExitStatus:
156
0
            builder.append("GetLastExitStatus"sv);
157
0
            break;
158
0
        case Op::GetPositionalParameterListAsString:
159
0
            builder.append("GetPositionalParameterListAsString"sv);
160
0
            break;
161
0
        case Op::GetShellProcessId:
162
0
            builder.append("GetShellProcessId"sv);
163
0
            break;
164
0
        case Op::GetVariable:
165
0
            builder.append("GetVariable"sv);
166
0
            break;
167
0
        }
168
0
        builder.append(" "sv);
169
0
        builder.append(parameter);
170
0
        builder.append(" ("sv);
171
0
        builder.append(argument);
172
0
        builder.append(")"sv);
173
0
        builder.append("}"sv);
174
0
        return builder.to_byte_string();
175
0
    }
176
};
177
178
struct ResolvedCommandExpansion {
179
    RefPtr<AST::Node> command;
180
    ExpansionRange range;
181
};
182
183
struct ResolvedArithmeticExpansion {
184
    String source_expression;
185
    ExpansionRange range;
186
};
187
188
using ResolvedExpansion = Variant<ResolvedParameterExpansion, ResolvedCommandExpansion, ResolvedArithmeticExpansion>;
189
190
struct HeredocEntry {
191
    String key;
192
    bool allow_interpolation;
193
    bool dedent;
194
};
195
196
struct State {
197
    StringBuilder buffer {};
198
    Reduction previous_reduction { Reduction::Start };
199
    bool escaping { false };
200
    bool in_skip_mode { false };
201
    AST::Position position {
202
        .start_offset = 0,
203
        .end_offset = 0,
204
        .start_line = {
205
            .line_number = 0,
206
            .line_column = 0,
207
        },
208
        .end_line = {
209
            .line_number = 0,
210
            .line_column = 0,
211
        },
212
    };
213
    Vector<Expansion> expansions {};
214
    Vector<HeredocEntry> heredoc_entries {};
215
    bool on_new_line { true };
216
};
217
218
struct Token {
219
    enum class Type {
220
        Eof,
221
        Newline,
222
        Continuation,
223
        Token,
224
        And,
225
        Pipe,
226
        OpenParen,
227
        CloseParen,
228
        Great,
229
        Less,
230
        AndIf,
231
        OrIf,
232
        DoubleSemicolon,
233
        DoubleLess,
234
        DoubleGreat,
235
        LessAnd,
236
        GreatAnd,
237
        LessGreat,
238
        DoubleLessDash,
239
        Clobber,
240
        Semicolon,
241
        HeredocContents,
242
243
        // Not produced by this lexer, but generated in later stages.
244
        AssignmentWord,
245
        ListAssignmentWord,
246
        Bang,
247
        Case,
248
        CloseBrace,
249
        Do,
250
        Done,
251
        Elif,
252
        Else,
253
        Esac,
254
        Fi,
255
        For,
256
        If,
257
        In,
258
        IoNumber,
259
        OpenBrace,
260
        Then,
261
        Until,
262
        VariableName,
263
        While,
264
        Word,
265
    };
266
267
    Type type;
268
    String value;
269
    Optional<AST::Position> position;
270
    Vector<Expansion> expansions;
271
    Vector<ResolvedExpansion> resolved_expansions {};
272
    StringView original_text;
273
    Optional<String> relevant_heredoc_key {};
274
    bool could_be_start_of_a_simple_command { false };
275
276
    static ErrorOr<Vector<Token>> maybe_from_state(State const& state)
277
12.7M
    {
278
12.7M
        if (state.buffer.is_empty() || state.buffer.string_view().trim_whitespace().is_empty())
279
5.51M
            return Vector<Token> {};
280
281
7.22M
        auto token = Token {
282
7.22M
            .type = Type::Token,
283
7.22M
            .value = TRY(state.buffer.to_string()),
284
0
            .position = state.position,
285
7.22M
            .expansions = state.expansions,
286
7.22M
            .original_text = {},
287
7.22M
        };
288
0
        return Vector<Token> { move(token) };
289
7.22M
    }
290
291
    static Optional<Token::Type> operator_from_name(StringView name)
292
81.4M
    {
293
81.4M
        if (name == "&&"sv)
294
359k
            return Token::Type::AndIf;
295
81.0M
        if (name == "||"sv)
296
1.98M
            return Token::Type::OrIf;
297
79.0M
        if (name == ";;"sv)
298
22.9k
            return Token::Type::DoubleSemicolon;
299
79.0M
        if (name == "<<"sv)
300
2.29M
            return Token::Type::DoubleLess;
301
76.7M
        if (name == ">>"sv)
302
34.5k
            return Token::Type::DoubleGreat;
303
76.7M
        if (name == "<&"sv)
304
199k
            return Token::Type::LessAnd;
305
76.5M
        if (name == ">&"sv)
306
560k
            return Token::Type::GreatAnd;
307
75.9M
        if (name == "<>"sv)
308
12.4k
            return Token::Type::LessGreat;
309
75.9M
        if (name == "<<-"sv)
310
10.8k
            return Token::Type::DoubleLessDash;
311
75.9M
        if (name == ">|"sv)
312
312
            return Token::Type::Clobber;
313
75.9M
        if (name == ";"sv)
314
2.55M
            return Token::Type::Semicolon;
315
73.4M
        if (name == "&"sv)
316
2.10M
            return Token::Type::And;
317
71.3M
        if (name == "|"sv)
318
3.35M
            return Token::Type::Pipe;
319
67.9M
        if (name == ">"sv)
320
2.22M
            return Token::Type::Great;
321
65.7M
        if (name == "<"sv)
322
2.50M
            return Token::Type::Less;
323
63.2M
        if (name == "\n"sv)
324
0
            return Token::Type::Newline;
325
63.2M
        if (name == "("sv)
326
4.31M
            return Token::Type::OpenParen;
327
58.8M
        if (name == "{"sv)
328
2.38M
            return Token::Type::OpenBrace;
329
56.5M
        if (name == ")"sv)
330
1.73M
            return Token::Type::CloseParen;
331
54.7M
        if (name == "}"sv)
332
580k
            return Token::Type::CloseBrace;
333
334
54.1M
        return {};
335
54.7M
    }
336
337
    static ErrorOr<Vector<Token>> operators_from(State const& state)
338
8.46M
    {
339
8.46M
        auto name = TRY(state.buffer.to_string());
340
0
        auto type = operator_from_name(name);
341
8.46M
        if (!type.has_value())
342
0
            return Vector<Token> {};
343
344
8.46M
        return Vector {
345
8.46M
            Token {
346
8.46M
                .type = *type,
347
8.46M
                .value = name,
348
8.46M
                .position = state.position,
349
8.46M
                .expansions = {},
350
8.46M
                .original_text = {},
351
8.46M
            }
352
8.46M
        };
353
8.46M
    }
354
355
    static Token eof()
356
1.12M
    {
357
1.12M
        return {
358
1.12M
            .type = Type::Eof,
359
1.12M
            .value = {},
360
1.12M
            .position = {},
361
1.12M
            .expansions = {},
362
1.12M
            .original_text = {},
363
1.12M
        };
364
1.12M
    }
365
366
    static Token newline()
367
1.99M
    {
368
1.99M
        return {
369
1.99M
            .type = Type::Newline,
370
1.99M
            .value = "\n"_string,
371
1.99M
            .position = {},
372
1.99M
            .expansions = {},
373
1.99M
            .original_text = {},
374
1.99M
        };
375
1.99M
    }
376
377
    static Token continuation(char expected)
378
11.5k
    {
379
11.5k
        return {
380
11.5k
            .type = Type::Continuation,
381
11.5k
            .value = String::from_code_point(expected),
382
11.5k
            .position = {},
383
11.5k
            .expansions = {},
384
11.5k
            .original_text = {},
385
11.5k
        };
386
11.5k
    }
387
388
    static Token continuation(String expected)
389
13.0k
    {
390
13.0k
        return {
391
13.0k
            .type = Type::Continuation,
392
13.0k
            .value = move(expected),
393
13.0k
            .position = {},
394
13.0k
            .expansions = {},
395
13.0k
            .original_text = {},
396
13.0k
        };
397
13.0k
    }
398
399
    StringView type_name() const;
400
};
401
402
class Lexer {
403
public:
404
    explicit Lexer(StringView input)
405
554k
        : m_lexer(input)
406
554k
    {
407
554k
    }
408
409
    ErrorOr<Vector<Token>> batch_next(Optional<Reduction> starting_reduction = {});
410
411
    struct HeredocKeyResult {
412
        String key;
413
        bool allow_interpolation;
414
    };
415
416
    static HeredocKeyResult process_heredoc_key(Token const&);
417
418
private:
419
    struct ReductionResult {
420
        Vector<Token> tokens;
421
        Reduction next_reduction { Reduction::None };
422
    };
423
424
    ErrorOr<ReductionResult> reduce(Reduction);
425
    ErrorOr<ReductionResult> reduce_end();
426
    ErrorOr<ReductionResult> reduce_operator();
427
    ErrorOr<ReductionResult> reduce_comment();
428
    ErrorOr<ReductionResult> reduce_single_quoted_string();
429
    ErrorOr<ReductionResult> reduce_double_quoted_string();
430
    ErrorOr<ReductionResult> reduce_expansion();
431
    ErrorOr<ReductionResult> reduce_command_expansion();
432
    ErrorOr<ReductionResult> reduce_start();
433
    ErrorOr<ReductionResult> reduce_arithmetic_expansion();
434
    ErrorOr<ReductionResult> reduce_special_parameter_expansion();
435
    ErrorOr<ReductionResult> reduce_parameter_expansion();
436
    ErrorOr<ReductionResult> reduce_command_or_arithmetic_substitution_expansion();
437
    ErrorOr<ReductionResult> reduce_extended_parameter_expansion();
438
    ErrorOr<ReductionResult> reduce_heredoc_contents();
439
440
    struct SkipTokens {
441
        explicit SkipTokens(Lexer& lexer)
442
144k
            : m_state_change(lexer.m_state, lexer.m_state)
443
144k
        {
444
144k
            lexer.m_state.in_skip_mode = true;
445
144k
        }
446
447
        TemporaryChange<State> m_state_change;
448
    };
449
450
144k
    SkipTokens switch_to_skip_mode() { return SkipTokens { *this }; }
451
452
    char consume();
453
    bool consume_specific(char);
454
    void reconsume(StringView);
455
    ExpansionRange range(ssize_t offset = 0) const;
456
457
    GenericLexer m_lexer;
458
    State m_state;
459
    Reduction m_next_reduction { Reduction::Start };
460
};
461
462
}