Coverage Report

Created: 2026-06-07 07:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibShell/PosixLexer.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/CharacterTypes.h>
8
#include <LibShell/PosixLexer.h>
9
10
static bool is_operator(StringView text)
11
8.33M
{
12
8.33M
    return Shell::Posix::Token::operator_from_name(text).has_value();
13
8.33M
}
14
15
static bool is_part_of_operator(StringView text, char ch)
16
74.1M
{
17
74.1M
    StringBuilder builder;
18
74.1M
    builder.append(text);
19
74.1M
    builder.append(ch);
20
21
74.1M
    return Shell::Posix::Token::operator_from_name(builder.string_view()).has_value();
22
74.1M
}
23
24
namespace Shell::Posix {
25
26
ErrorOr<Vector<Token>> Lexer::batch_next(Optional<Reduction> starting_reduction)
27
14.1M
{
28
14.1M
    if (starting_reduction.has_value())
29
16.4k
        m_next_reduction = *starting_reduction;
30
31
113M
    for (; m_next_reduction != Reduction::None;) {
32
113M
        auto result = TRY(reduce(m_next_reduction));
33
113M
        m_next_reduction = result.next_reduction;
34
113M
        if (!result.tokens.is_empty())
35
14.1M
            return result.tokens;
36
113M
    }
37
38
0
    return Vector<Token> {};
39
14.1M
}
40
41
ExpansionRange Lexer::range(ssize_t offset) const
42
7.97M
{
43
7.97M
    return {
44
7.97M
        m_state.position.end_offset - m_state.position.start_offset + offset,
45
7.97M
        0,
46
7.97M
    };
47
7.97M
}
48
49
char Lexer::consume()
50
154M
{
51
154M
    auto ch = m_lexer.consume();
52
154M
    if (ch == '\n') {
53
4.10M
        m_state.position.end_line.line_number++;
54
4.10M
        m_state.position.end_line.line_column = 0;
55
4.10M
    }
56
57
154M
    m_state.position.end_offset++;
58
154M
    return ch;
59
154M
}
60
61
void Lexer::reconsume(StringView string)
62
2.64k
{
63
30.2M
    for (auto byte : string.bytes()) {
64
30.2M
        if (byte == '\n') {
65
1.56M
            m_state.position.end_line.line_number++;
66
1.56M
            m_state.position.end_line.line_column = 0;
67
1.56M
        }
68
69
30.2M
        m_state.position.end_offset++;
70
30.2M
    }
71
2.64k
}
72
73
bool Lexer::consume_specific(char ch)
74
443M
{
75
443M
    if (m_lexer.peek() == ch) {
76
9.98M
        consume();
77
9.98M
        return true;
78
9.98M
    }
79
433M
    return false;
80
443M
}
81
82
ErrorOr<Lexer::ReductionResult> Lexer::reduce(Reduction reduction)
83
168M
{
84
168M
    switch (reduction) {
85
0
    case Reduction::None:
86
0
        return ReductionResult { {}, Reduction::None };
87
626k
    case Reduction::End:
88
626k
        return reduce_end();
89
10.2M
    case Reduction::Operator:
90
10.2M
        return reduce_operator();
91
7.36M
    case Reduction::Comment:
92
7.36M
        return reduce_comment();
93
12.2M
    case Reduction::SingleQuotedString:
94
12.2M
        return reduce_single_quoted_string();
95
27.7M
    case Reduction::DoubleQuotedString:
96
27.7M
        return reduce_double_quoted_string();
97
4.49M
    case Reduction::Expansion:
98
4.49M
        return reduce_expansion();
99
13.8M
    case Reduction::CommandExpansion:
100
13.8M
        return reduce_command_expansion();
101
77.4M
    case Reduction::Start:
102
77.4M
        return reduce_start();
103
1.64M
    case Reduction::ArithmeticExpansion:
104
1.64M
        return reduce_arithmetic_expansion();
105
557k
    case Reduction::SpecialParameterExpansion:
106
557k
        return reduce_special_parameter_expansion();
107
3.11M
    case Reduction::ParameterExpansion:
108
3.11M
        return reduce_parameter_expansion();
109
240k
    case Reduction::CommandOrArithmeticSubstitutionExpansion:
110
240k
        return reduce_command_or_arithmetic_substitution_expansion();
111
6.33M
    case Reduction::ExtendedParameterExpansion:
112
6.33M
        return reduce_extended_parameter_expansion();
113
2.76M
    case Reduction::HeredocContents:
114
2.76M
        return reduce_heredoc_contents();
115
168M
    }
116
117
0
    VERIFY_NOT_REACHED();
118
0
}
119
120
ErrorOr<Lexer::ReductionResult> Lexer::reduce_end()
121
626k
{
122
626k
    return ReductionResult {
123
626k
        .tokens = { Token::eof() },
124
626k
        .next_reduction = Reduction::None,
125
626k
    };
126
626k
}
127
128
Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
129
1.55M
{
130
1.55M
    StringBuilder builder;
131
1.55M
    enum ParseState {
132
1.55M
        Free,
133
1.55M
        InDoubleQuotes,
134
1.55M
        InSingleQuotes,
135
1.55M
    };
136
1.55M
    Vector<ParseState, 4> parse_state;
137
1.55M
    parse_state.append(Free);
138
1.55M
    bool escaped = false;
139
1.55M
    bool had_a_single_quote_segment = false;
140
141
20.2M
    for (auto byte : token.value.bytes()) {
142
20.2M
        switch (parse_state.last()) {
143
10.7M
        case Free:
144
10.7M
            switch (byte) {
145
14.0k
            case '"':
146
14.0k
                if (escaped) {
147
0
                    builder.append(byte);
148
0
                    escaped = false;
149
14.0k
                } else {
150
14.0k
                    parse_state.append(InDoubleQuotes);
151
14.0k
                }
152
14.0k
                break;
153
1.10k
            case '\'':
154
1.10k
                if (escaped) {
155
0
                    builder.append(byte);
156
0
                    escaped = false;
157
1.10k
                } else {
158
1.10k
                    had_a_single_quote_segment = true;
159
1.10k
                    parse_state.append(InSingleQuotes);
160
1.10k
                }
161
1.10k
                break;
162
0
            case '\\':
163
0
                if (escaped) {
164
0
                    builder.append(byte);
165
0
                    escaped = false;
166
0
                } else {
167
0
                    escaped = true;
168
0
                }
169
0
                break;
170
10.7M
            default:
171
                // NOTE: bash eats the backslash outside quotes :shrug:
172
10.7M
                if (escaped && parse_state.last() != Free) {
173
0
                    builder.append('\\');
174
0
                    escaped = false;
175
0
                }
176
10.7M
                builder.append(byte);
177
10.7M
                break;
178
10.7M
            }
179
10.7M
            break;
180
10.7M
        case InDoubleQuotes:
181
7.96M
            if (!escaped && byte == '"') {
182
13.9k
                parse_state.take_last();
183
13.9k
                break;
184
13.9k
            }
185
7.94M
            if (escaped) {
186
3.40M
                if (byte != '"')
187
3.40M
                    builder.append('\\');
188
3.40M
                builder.append(byte);
189
3.40M
                break;
190
3.40M
            }
191
4.54M
            if (byte == '\\')
192
38
                escaped = true;
193
4.54M
            else
194
4.54M
                builder.append(byte);
195
4.54M
            break;
196
1.57M
        case InSingleQuotes:
197
1.57M
            if (byte == '\'') {
198
1.08k
                parse_state.take_last();
199
1.08k
                break;
200
1.08k
            }
201
1.57M
            builder.append(byte);
202
1.57M
            break;
203
20.2M
        }
204
20.2M
    }
205
206
    // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
207
208
1.55M
    return {
209
1.55M
        .key = builder.to_string().release_value_but_fixme_should_propagate_errors(),
210
1.55M
        .allow_interpolation = !had_a_single_quote_segment,
211
1.55M
    };
212
1.55M
}
213
214
ErrorOr<Lexer::ReductionResult> Lexer::reduce_operator()
215
10.2M
{
216
10.2M
    if (m_lexer.is_eof()) {
217
4.39k
        if (is_operator(m_state.buffer.string_view())) {
218
4.39k
            auto tokens = TRY(Token::operators_from(m_state));
219
4.39k
            m_state.buffer.clear();
220
4.39k
            m_state.position.start_offset = m_state.position.end_offset;
221
4.39k
            m_state.position.start_line = m_state.position.end_line;
222
223
4.39k
            return ReductionResult {
224
4.39k
                .tokens = move(tokens),
225
4.39k
                .next_reduction = Reduction::End,
226
4.39k
            };
227
4.39k
        }
228
229
0
        return reduce(Reduction::Start);
230
4.39k
    }
231
232
10.2M
    if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) {
233
1.93M
        m_state.buffer.append(consume());
234
1.93M
        return ReductionResult {
235
1.93M
            .tokens = {},
236
1.93M
            .next_reduction = Reduction::Operator,
237
1.93M
        };
238
1.93M
    }
239
240
8.32M
    auto tokens = Vector<Token> {};
241
8.32M
    if (is_operator(m_state.buffer.string_view())) {
242
8.32M
        tokens.extend(TRY(Token::operators_from(m_state)));
243
8.32M
        m_state.buffer.clear();
244
8.32M
        m_state.position.start_offset = m_state.position.end_offset;
245
8.32M
        m_state.position.start_line = m_state.position.end_line;
246
8.32M
    }
247
248
8.32M
    auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
249
250
8.32M
    auto result = TRY(reduce(Reduction::Start));
251
8.32M
    tokens.extend(move(result.tokens));
252
253
24.9M
    while (expect_heredoc_entry && tokens.size() == 1 && result.next_reduction != Reduction::None) {
254
16.6M
        result = TRY(reduce(result.next_reduction));
255
16.6M
        tokens.extend(move(result.tokens));
256
16.6M
    }
257
258
8.32M
    if (expect_heredoc_entry && tokens.size() > 1) {
259
1.26M
        auto [key, interpolation] = process_heredoc_key(tokens[1]);
260
1.26M
        m_state.heredoc_entries.append(HeredocEntry {
261
1.26M
            .key = key,
262
1.26M
            .allow_interpolation = interpolation,
263
1.26M
            .dedent = tokens[0].type == Token::Type::DoubleLessDash,
264
1.26M
        });
265
1.26M
    }
266
267
8.32M
    return ReductionResult {
268
8.32M
        .tokens = move(tokens),
269
8.32M
        .next_reduction = result.next_reduction,
270
8.32M
    };
271
8.32M
}
272
273
ErrorOr<Lexer::ReductionResult> Lexer::reduce_comment()
274
7.36M
{
275
7.36M
    if (m_lexer.is_eof()) {
276
772
        return ReductionResult {
277
772
            .tokens = {},
278
772
            .next_reduction = Reduction::End,
279
772
        };
280
772
    }
281
282
7.36M
    if (consume() == '\n') {
283
245k
        m_state.on_new_line = true;
284
245k
        return ReductionResult {
285
245k
            .tokens = { Token::newline() },
286
245k
            .next_reduction = Reduction::Start,
287
245k
        };
288
245k
    }
289
290
7.11M
    return ReductionResult {
291
7.11M
        .tokens = {},
292
7.11M
        .next_reduction = Reduction::Comment,
293
7.11M
    };
294
7.36M
}
295
296
ErrorOr<Lexer::ReductionResult> Lexer::reduce_single_quoted_string()
297
12.2M
{
298
12.2M
    if (m_lexer.is_eof()) {
299
466
        auto tokens = TRY(Token::maybe_from_state(m_state));
300
458
        tokens.append(Token::continuation('\''));
301
458
        return ReductionResult {
302
458
            .tokens = move(tokens),
303
458
            .next_reduction = Reduction::End,
304
458
        };
305
466
    }
306
307
12.2M
    auto ch = consume();
308
12.2M
    m_state.buffer.append(ch);
309
310
12.2M
    if (ch == '\'') {
311
271k
        return ReductionResult {
312
271k
            .tokens = {},
313
271k
            .next_reduction = Reduction::Start,
314
271k
        };
315
271k
    }
316
317
11.9M
    return ReductionResult {
318
11.9M
        .tokens = {},
319
11.9M
        .next_reduction = Reduction::SingleQuotedString,
320
11.9M
    };
321
12.2M
}
322
323
ErrorOr<Lexer::ReductionResult> Lexer::reduce_double_quoted_string()
324
27.7M
{
325
27.7M
    m_state.previous_reduction = Reduction::DoubleQuotedString;
326
27.7M
    if (m_lexer.is_eof()) {
327
12.6k
        auto tokens = TRY(Token::maybe_from_state(m_state));
328
12.6k
        tokens.append(Token::continuation('"'));
329
12.6k
        return ReductionResult {
330
12.6k
            .tokens = move(tokens),
331
12.6k
            .next_reduction = Reduction::End,
332
12.6k
        };
333
12.6k
    }
334
335
27.7M
    auto ch = consume();
336
27.7M
    m_state.buffer.append(ch);
337
338
27.7M
    if (m_state.escaping) {
339
84.5k
        m_state.escaping = false;
340
341
84.5k
        return ReductionResult {
342
84.5k
            .tokens = {},
343
84.5k
            .next_reduction = Reduction::DoubleQuotedString,
344
84.5k
        };
345
84.5k
    }
346
347
27.6M
    switch (ch) {
348
84.5k
    case '\\':
349
84.5k
        m_state.escaping = true;
350
84.5k
        return ReductionResult {
351
84.5k
            .tokens = {},
352
84.5k
            .next_reduction = Reduction::DoubleQuotedString,
353
84.5k
        };
354
209k
    case '"':
355
209k
        m_state.previous_reduction = Reduction::Start;
356
209k
        return ReductionResult {
357
209k
            .tokens = {},
358
209k
            .next_reduction = Reduction::Start,
359
209k
        };
360
403k
    case '$':
361
403k
        if (m_lexer.next_is("("))
362
115k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
363
288k
        else
364
288k
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
365
403k
        return ReductionResult {
366
403k
            .tokens = {},
367
403k
            .next_reduction = Reduction::Expansion,
368
403k
        };
369
30.8k
    case '`':
370
30.8k
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
371
30.8k
        return ReductionResult {
372
30.8k
            .tokens = {},
373
30.8k
            .next_reduction = Reduction::CommandExpansion,
374
30.8k
        };
375
26.9M
    default:
376
26.9M
        return ReductionResult {
377
26.9M
            .tokens = {},
378
26.9M
            .next_reduction = Reduction::DoubleQuotedString,
379
26.9M
        };
380
27.6M
    }
381
27.6M
}
382
383
ErrorOr<Lexer::ReductionResult> Lexer::reduce_expansion()
384
4.49M
{
385
4.49M
    if (m_lexer.is_eof())
386
113k
        return reduce(m_state.previous_reduction);
387
388
4.38M
    auto ch = m_lexer.peek();
389
390
4.38M
    switch (ch) {
391
13.4k
    case '{':
392
13.4k
        consume();
393
13.4k
        m_state.buffer.append(ch);
394
13.4k
        return ReductionResult {
395
13.4k
            .tokens = {},
396
13.4k
            .next_reduction = Reduction::ExtendedParameterExpansion,
397
13.4k
        };
398
208k
    case '(':
399
208k
        consume();
400
208k
        m_state.buffer.append(ch);
401
208k
        return ReductionResult {
402
208k
            .tokens = {},
403
208k
            .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
404
208k
        };
405
41.0k
    case 'a' ... 'z':
406
113k
    case 'A' ... 'Z':
407
159k
    case '_': {
408
159k
        consume();
409
159k
        m_state.buffer.append(ch);
410
159k
        auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
411
159k
        expansion.parameter.append(ch);
412
159k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
413
414
159k
        return ReductionResult {
415
159k
            .tokens = {},
416
159k
            .next_reduction = Reduction::ParameterExpansion,
417
159k
        };
418
113k
    }
419
859
    case '0' ... '9':
420
17.9k
    case '-':
421
26.2k
    case '!':
422
31.6k
    case '@':
423
32.5k
    case '#':
424
39.4k
    case '?':
425
40.0k
    case '*':
426
557k
    case '$':
427
557k
        return reduce(Reduction::SpecialParameterExpansion);
428
3.44M
    default:
429
3.44M
        m_state.buffer.append(ch);
430
3.44M
        return reduce(m_state.previous_reduction);
431
4.38M
    }
432
4.38M
}
433
434
ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_expansion()
435
13.8M
{
436
13.8M
    if (m_lexer.is_eof()) {
437
144
        auto& expansion = m_state.expansions.last().get<CommandExpansion>();
438
144
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
439
440
144
        return ReductionResult {
441
144
            .tokens = { Token::continuation('`') },
442
144
            .next_reduction = m_state.previous_reduction,
443
144
        };
444
144
    }
445
446
13.8M
    auto ch = consume();
447
448
13.8M
    if (!m_state.escaping && ch == '`') {
449
2.81M
        m_state.buffer.append(ch);
450
2.81M
        auto& expansion = m_state.expansions.last().get<CommandExpansion>();
451
2.81M
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
452
453
2.81M
        return ReductionResult {
454
2.81M
            .tokens = {},
455
2.81M
            .next_reduction = m_state.previous_reduction,
456
2.81M
        };
457
2.81M
    }
458
459
11.0M
    if (!m_state.escaping && ch == '\\') {
460
45.0k
        m_state.escaping = true;
461
45.0k
        return ReductionResult {
462
45.0k
            .tokens = {},
463
45.0k
            .next_reduction = Reduction::CommandExpansion,
464
45.0k
        };
465
45.0k
    }
466
467
10.9M
    m_state.escaping = false;
468
10.9M
    m_state.buffer.append(ch);
469
10.9M
    m_state.expansions.last().get<CommandExpansion>().command.append(ch);
470
10.9M
    return ReductionResult {
471
10.9M
        .tokens = {},
472
10.9M
        .next_reduction = Reduction::CommandExpansion,
473
10.9M
    };
474
11.0M
}
475
476
ErrorOr<Lexer::ReductionResult> Lexer::reduce_heredoc_contents()
477
2.76M
{
478
2.76M
    if (m_lexer.is_eof()) {
479
3.59k
        auto tokens = TRY(Token::maybe_from_state(m_state));
480
3.59k
        m_state.buffer.clear();
481
3.59k
        m_state.position.start_offset = m_state.position.end_offset;
482
3.59k
        m_state.position.start_line = m_state.position.end_line;
483
484
3.59k
        return ReductionResult {
485
3.59k
            .tokens = move(tokens),
486
3.59k
            .next_reduction = Reduction::End,
487
3.59k
        };
488
3.59k
    }
489
490
2.75M
    if (!m_state.escaping && consume_specific('\\')) {
491
315
        m_state.escaping = true;
492
315
        m_state.buffer.append('\\');
493
315
        return ReductionResult {
494
315
            .tokens = {},
495
315
            .next_reduction = Reduction::HeredocContents,
496
315
        };
497
315
    }
498
499
2.75M
    if (!m_state.escaping && consume_specific('$')) {
500
8.57k
        m_state.buffer.append('$');
501
8.57k
        if (m_lexer.next_is("("))
502
2.87k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
503
5.70k
        else
504
5.70k
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
505
506
8.57k
        return ReductionResult {
507
8.57k
            .tokens = {},
508
8.57k
            .next_reduction = Reduction::Expansion,
509
8.57k
        };
510
8.57k
    }
511
512
2.74M
    if (!m_state.escaping && consume_specific('`')) {
513
4.26k
        m_state.buffer.append('`');
514
4.26k
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
515
4.26k
        return ReductionResult {
516
4.26k
            .tokens = {},
517
4.26k
            .next_reduction = Reduction::CommandExpansion,
518
4.26k
        };
519
4.26k
    }
520
521
2.74M
    m_state.escaping = false;
522
2.74M
    m_state.buffer.append(consume());
523
2.74M
    return ReductionResult {
524
2.74M
        .tokens = {},
525
2.74M
        .next_reduction = Reduction::HeredocContents,
526
2.74M
    };
527
2.74M
}
528
529
ErrorOr<Lexer::ReductionResult> Lexer::reduce_start()
530
77.4M
{
531
77.4M
    auto was_on_new_line = m_state.on_new_line;
532
77.4M
    m_state.on_new_line = false;
533
534
77.4M
    if (m_lexer.is_eof()) {
535
606k
        auto tokens = TRY(Token::maybe_from_state(m_state));
536
606k
        m_state.buffer.clear();
537
606k
        m_state.expansions.clear();
538
606k
        m_state.position.start_offset = m_state.position.end_offset;
539
606k
        m_state.position.start_line = m_state.position.end_line;
540
541
606k
        return ReductionResult {
542
606k
            .tokens = move(tokens),
543
606k
            .next_reduction = Reduction::End,
544
606k
        };
545
606k
    }
546
547
76.8M
    if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
548
2.49k
        auto const& entry = m_state.heredoc_entries.first();
549
550
2.49k
        auto start_index = m_lexer.tell();
551
2.49k
        Optional<size_t> end_index;
552
553
30.2M
        for (; !m_lexer.is_eof();) {
554
30.2M
            auto index = m_lexer.tell();
555
30.2M
            auto possible_end_index = m_lexer.tell();
556
30.2M
            if (m_lexer.consume_specific('\n')) {
557
1.56M
                if (entry.dedent)
558
117k
                    m_lexer.ignore_while(is_any_of("\t"sv));
559
1.56M
                if (m_lexer.consume_specific(entry.key.bytes_as_string_view())) {
560
26.5k
                    if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
561
150
                        end_index = possible_end_index;
562
150
                        break;
563
150
                    }
564
26.5k
                }
565
1.56M
            }
566
30.2M
            if (m_lexer.tell() == index)
567
28.6M
                m_lexer.ignore();
568
30.2M
        }
569
570
2.49k
        auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
571
2.49k
        reconsume(contents);
572
2.49k
        if (end_index.has_value())
573
150
            reconsume(m_lexer.input().substring_view_starting_after_substring(contents).substring_view(0, m_lexer.tell() - *end_index));
574
575
2.49k
        m_state.buffer.clear();
576
2.49k
        m_state.buffer.append(contents);
577
578
2.49k
        auto token = TRY(Token::maybe_from_state(m_state)).first();
579
2.47k
        token.relevant_heredoc_key = entry.key;
580
2.47k
        token.type = Token::Type::HeredocContents;
581
582
2.47k
        m_state.heredoc_entries.take_first();
583
584
2.47k
        m_state.on_new_line = true;
585
586
2.47k
        m_state.buffer.clear();
587
2.47k
        m_state.position.start_offset = m_state.position.end_offset;
588
2.47k
        m_state.position.start_line = m_state.position.end_line;
589
590
2.47k
        Vector<Token> tokens { move(token), Token::newline() };
591
592
2.47k
        return ReductionResult {
593
2.47k
            .tokens = move(tokens),
594
2.47k
            .next_reduction = Reduction::Start,
595
2.47k
        };
596
2.49k
    }
597
598
76.8M
    if (m_state.escaping && consume_specific('\n')) {
599
225
        m_state.escaping = false;
600
601
225
        auto buffer = m_state.buffer.to_byte_string().substring(0, m_state.buffer.length() - 1);
602
225
        m_state.buffer.clear();
603
225
        m_state.buffer.append(buffer);
604
605
225
        return ReductionResult {
606
225
            .tokens = {},
607
225
            .next_reduction = Reduction::Start,
608
225
        };
609
225
    }
610
611
76.8M
    if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) {
612
246k
        consume();
613
246k
        return ReductionResult {
614
246k
            .tokens = {},
615
246k
            .next_reduction = Reduction::Comment,
616
246k
        };
617
246k
    }
618
619
76.5M
    if (!m_state.escaping && consume_specific('\n')) {
620
2.59M
        auto tokens = TRY(Token::maybe_from_state(m_state));
621
2.59M
        tokens.append(Token::newline());
622
623
2.59M
        m_state.on_new_line = true;
624
625
2.59M
        m_state.buffer.clear();
626
2.59M
        m_state.expansions.clear();
627
2.59M
        m_state.position.start_offset = m_state.position.end_offset;
628
2.59M
        m_state.position.start_line = m_state.position.end_line;
629
630
2.59M
        return ReductionResult {
631
2.59M
            .tokens = move(tokens),
632
2.59M
            .next_reduction = Reduction::Start,
633
2.59M
        };
634
2.59M
    }
635
636
73.9M
    if (!m_state.escaping && consume_specific('\\')) {
637
25.8k
        m_state.escaping = true;
638
25.8k
        m_state.buffer.append('\\');
639
25.8k
        return ReductionResult {
640
25.8k
            .tokens = {},
641
25.8k
            .next_reduction = Reduction::Start,
642
25.8k
        };
643
25.8k
    }
644
645
73.9M
    if (!m_state.escaping && consume_specific('\'')) {
646
271k
        m_state.buffer.append('\'');
647
271k
        return ReductionResult {
648
271k
            .tokens = {},
649
271k
            .next_reduction = Reduction::SingleQuotedString,
650
271k
        };
651
271k
    }
652
653
73.6M
    if (!m_state.escaping && consume_specific('"')) {
654
209k
        m_state.buffer.append('"');
655
209k
        return ReductionResult {
656
209k
            .tokens = {},
657
209k
            .next_reduction = Reduction::DoubleQuotedString,
658
209k
        };
659
209k
    }
660
661
73.4M
    if (!m_state.escaping && is_ascii_space(m_lexer.peek())) {
662
2.59M
        consume();
663
2.59M
        auto tokens = TRY(Token::maybe_from_state(m_state));
664
2.59M
        m_state.buffer.clear();
665
2.59M
        m_state.expansions.clear();
666
2.59M
        m_state.position.start_offset = m_state.position.end_offset;
667
2.59M
        m_state.position.start_line = m_state.position.end_line;
668
669
2.59M
        return ReductionResult {
670
2.59M
            .tokens = move(tokens),
671
2.59M
            .next_reduction = Reduction::Start,
672
2.59M
        };
673
2.59M
    }
674
675
70.8M
    if (!m_state.escaping && consume_specific('$')) {
676
4.08M
        m_state.buffer.append('$');
677
4.08M
        if (m_lexer.next_is("("))
678
91.0k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
679
3.99M
        else
680
3.99M
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
681
682
4.08M
        return ReductionResult {
683
4.08M
            .tokens = {},
684
4.08M
            .next_reduction = Reduction::Expansion,
685
4.08M
        };
686
4.08M
    }
687
688
66.7M
    if (!m_state.escaping && consume_specific('`')) {
689
2.78M
        m_state.buffer.append('`');
690
2.78M
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
691
2.78M
        return ReductionResult {
692
2.78M
            .tokens = {},
693
2.78M
            .next_reduction = Reduction::CommandExpansion,
694
2.78M
        };
695
2.78M
    }
696
697
63.9M
    if (!m_state.escaping && m_state.in_skip_mode && is_any_of("})"sv)(m_lexer.peek())) {
698
        // That's an eof for us.
699
116k
        return ReductionResult {
700
116k
            .tokens = {},
701
116k
            .next_reduction = Reduction::None,
702
116k
        };
703
116k
    }
704
705
63.8M
    if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) {
706
8.34M
        auto tokens = TRY(Token::maybe_from_state(m_state));
707
8.34M
        m_state.buffer.clear();
708
8.34M
        m_state.buffer.append(consume());
709
8.34M
        m_state.expansions.clear();
710
8.34M
        m_state.position.start_offset = m_state.position.end_offset;
711
8.34M
        m_state.position.start_line = m_state.position.end_line;
712
713
8.34M
        return ReductionResult {
714
8.34M
            .tokens = move(tokens),
715
8.34M
            .next_reduction = Reduction::Operator,
716
8.34M
        };
717
8.34M
    }
718
719
55.5M
    m_state.escaping = false;
720
55.5M
    m_state.buffer.append(consume());
721
55.5M
    return ReductionResult {
722
55.5M
        .tokens = {},
723
55.5M
        .next_reduction = Reduction::Start,
724
55.5M
    };
725
63.8M
}
726
727
ErrorOr<Lexer::ReductionResult> Lexer::reduce_arithmetic_expansion()
728
1.64M
{
729
1.64M
    if (m_lexer.is_eof()) {
730
12
        auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
731
12
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
732
733
12
        return ReductionResult {
734
12
            .tokens = { Token::continuation("$(("_string) },
735
12
            .next_reduction = m_state.previous_reduction,
736
12
        };
737
12
    }
738
739
1.64M
    if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) {
740
106k
        m_state.buffer.append(consume());
741
106k
        auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
742
106k
        expansion.expression = TRY(String::from_utf8(expansion.value.string_view().substring_view(0, expansion.value.length() - 1)));
743
106k
        expansion.value.clear();
744
106k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
745
746
106k
        return ReductionResult {
747
106k
            .tokens = {},
748
106k
            .next_reduction = m_state.previous_reduction,
749
106k
        };
750
106k
    }
751
752
1.53M
    auto ch = consume();
753
1.53M
    m_state.buffer.append(ch);
754
1.53M
    m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch);
755
1.53M
    return ReductionResult {
756
1.53M
        .tokens = {},
757
1.53M
        .next_reduction = Reduction::ArithmeticExpansion,
758
1.53M
    };
759
1.64M
}
760
761
ErrorOr<Lexer::ReductionResult> Lexer::reduce_special_parameter_expansion()
762
557k
{
763
557k
    auto ch = consume();
764
557k
    m_state.buffer.append(ch);
765
557k
    m_state.expansions.last() = ParameterExpansion {
766
557k
        .parameter = StringBuilder {},
767
557k
        .range = range(-2),
768
557k
    };
769
557k
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
770
557k
    expansion.parameter.append(ch);
771
557k
    expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
772
773
557k
    return ReductionResult {
774
557k
        .tokens = {},
775
557k
        .next_reduction = m_state.previous_reduction,
776
557k
    };
777
557k
}
778
779
ErrorOr<Lexer::ReductionResult> Lexer::reduce_parameter_expansion()
780
3.11M
{
781
3.11M
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
782
783
3.11M
    if (m_lexer.is_eof()) {
784
1.30k
        return ReductionResult {
785
1.30k
            .tokens = {},
786
1.30k
            .next_reduction = Reduction::Start,
787
1.30k
        };
788
1.30k
    }
789
790
3.11M
    auto next = m_lexer.peek();
791
3.11M
    if (is_ascii_alphanumeric(next) || next == '_') {
792
2.95M
        m_state.buffer.append(consume());
793
2.95M
        expansion.parameter.append(next);
794
2.95M
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
795
796
2.95M
        return ReductionResult {
797
2.95M
            .tokens = {},
798
2.95M
            .next_reduction = Reduction::ParameterExpansion,
799
2.95M
        };
800
2.95M
    }
801
802
158k
    return reduce(m_state.previous_reduction);
803
3.11M
}
804
805
ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_or_arithmetic_substitution_expansion()
806
240k
{
807
240k
    auto ch = m_lexer.peek();
808
240k
    if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
809
106k
        m_state.buffer.append(consume());
810
106k
        m_state.expansions.last() = ArithmeticExpansion {
811
106k
            .expression = {},
812
106k
            .value = StringBuilder {},
813
106k
            .range = range(-2)
814
106k
        };
815
106k
        return ReductionResult {
816
106k
            .tokens = {},
817
106k
            .next_reduction = Reduction::ArithmeticExpansion,
818
106k
        };
819
106k
    }
820
821
133k
    auto saved_position = m_state.position;
822
133k
    {
823
133k
        auto skip_mode = switch_to_skip_mode();
824
825
133k
        auto next_reduction = Reduction::Start;
826
26.3M
        do {
827
26.3M
            auto result = TRY(reduce(next_reduction));
828
26.3M
            next_reduction = result.next_reduction;
829
26.3M
        } while (next_reduction != Reduction::None);
830
133k
        saved_position = m_state.position;
831
132k
    }
832
833
132k
    auto const skipped_text = m_lexer.input().substring_view(m_state.position.end_offset, saved_position.end_offset - m_state.position.end_offset);
834
132k
    m_state.position.end_offset = saved_position.end_offset;
835
132k
    m_state.position.end_line = saved_position.end_line;
836
837
132k
    m_state.buffer.append(skipped_text);
838
132k
    m_state.expansions.last().get<CommandExpansion>().command.append(skipped_text);
839
132k
    m_state.expansions.last().visit([&](auto& expansion) {
840
132k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
841
132k
    });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::ParameterExpansion>(Shell::Posix::ParameterExpansion&) const
PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::CommandExpansion>(Shell::Posix::CommandExpansion&) const
Line
Count
Source
839
132k
    m_state.expansions.last().visit([&](auto& expansion) {
840
132k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
841
132k
    });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::ArithmeticExpansion>(Shell::Posix::ArithmeticExpansion&) const
842
843
132k
    if (m_lexer.is_eof()) {
844
16.0k
        return ReductionResult {
845
16.0k
            .tokens = { Token::continuation("$("_string) },
846
16.0k
            .next_reduction = m_state.previous_reduction,
847
16.0k
        };
848
16.0k
    }
849
850
116k
    ch = m_lexer.peek();
851
116k
    if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
852
0
        m_state.buffer.append(consume());
853
0
        m_state.expansions.last() = ArithmeticExpansion {
854
0
            .expression = {},
855
0
            .value = m_state.expansions.last().get<CommandExpansion>().command,
856
0
            .range = range(-2)
857
0
        };
858
0
        return ReductionResult {
859
0
            .tokens = {},
860
0
            .next_reduction = Reduction::ArithmeticExpansion,
861
0
        };
862
0
    }
863
864
116k
    if (ch == ')') {
865
85.6k
        m_state.buffer.append(consume());
866
85.6k
        m_state.expansions.last().visit([&](auto& expansion) {
867
85.6k
            expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
868
85.6k
        });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::ParameterExpansion>(Shell::Posix::ParameterExpansion&) const
PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::CommandExpansion>(Shell::Posix::CommandExpansion&) const
Line
Count
Source
866
85.6k
        m_state.expansions.last().visit([&](auto& expansion) {
867
85.6k
            expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
868
85.6k
        });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::ArithmeticExpansion>(Shell::Posix::ArithmeticExpansion&) const
869
85.6k
        return ReductionResult {
870
85.6k
            .tokens = {},
871
85.6k
            .next_reduction = m_state.previous_reduction,
872
85.6k
        };
873
85.6k
    }
874
875
31.2k
    m_state.buffer.append(consume());
876
31.2k
    m_state.expansions.last().get<CommandExpansion>().command.append(ch);
877
31.2k
    return ReductionResult {
878
31.2k
        .tokens = {},
879
31.2k
        .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
880
31.2k
    };
881
116k
}
882
883
ErrorOr<Lexer::ReductionResult> Lexer::reduce_extended_parameter_expansion()
884
6.33M
{
885
6.33M
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
886
887
6.33M
    if (m_lexer.is_eof()) {
888
248
        return ReductionResult {
889
248
            .tokens = { Token::continuation("${"_string) },
890
248
            .next_reduction = m_state.previous_reduction,
891
248
        };
892
248
    }
893
894
6.33M
    auto ch = m_lexer.peek();
895
6.33M
    if (ch == '}') {
896
13.1k
        m_state.buffer.append(consume());
897
13.1k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
898
899
13.1k
        return ReductionResult {
900
13.1k
            .tokens = {},
901
13.1k
            .next_reduction = m_state.previous_reduction,
902
13.1k
        };
903
13.1k
    }
904
905
6.31M
    m_state.buffer.append(consume());
906
6.31M
    expansion.parameter.append(ch);
907
6.31M
    expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
908
909
6.31M
    return ReductionResult {
910
6.31M
        .tokens = {},
911
6.31M
        .next_reduction = Reduction::ExtendedParameterExpansion,
912
6.31M
    };
913
6.33M
}
914
915
StringView Token::type_name() const
916
2.83M
{
917
2.83M
    switch (type) {
918
21.5k
    case Type::Eof:
919
21.5k
        return "Eof"sv;
920
188k
    case Type::Newline:
921
188k
        return "Newline"sv;
922
237
    case Type::Continuation:
923
237
        return "Continuation"sv;
924
0
    case Type::Token:
925
0
        return "Token"sv;
926
359k
    case Type::And:
927
359k
        return "And"sv;
928
972k
    case Type::Pipe:
929
972k
        return "Pipe"sv;
930
29.7k
    case Type::OpenParen:
931
29.7k
        return "OpenParen"sv;
932
851k
    case Type::CloseParen:
933
851k
        return "CloseParen"sv;
934
380
    case Type::Great:
935
380
        return "Great"sv;
936
39
    case Type::Less:
937
39
        return "Less"sv;
938
1.28k
    case Type::AndIf:
939
1.28k
        return "AndIf"sv;
940
177k
    case Type::OrIf:
941
177k
        return "OrIf"sv;
942
145
    case Type::DoubleSemicolon:
943
145
        return "DoubleSemicolon"sv;
944
42
    case Type::DoubleLess:
945
42
        return "DoubleLess"sv;
946
2
    case Type::DoubleGreat:
947
2
        return "DoubleGreat"sv;
948
48.7k
    case Type::LessAnd:
949
48.7k
        return "LessAnd"sv;
950
17.9k
    case Type::GreatAnd:
951
17.9k
        return "GreatAnd"sv;
952
6
    case Type::LessGreat:
953
6
        return "LessGreat"sv;
954
0
    case Type::DoubleLessDash:
955
0
        return "DoubleLessDash"sv;
956
1
    case Type::Clobber:
957
1
        return "Clobber"sv;
958
88.7k
    case Type::Semicolon:
959
88.7k
        return "Semicolon"sv;
960
0
    case Type::HeredocContents:
961
0
        return "HeredocContents"sv;
962
615
    case Type::AssignmentWord:
963
615
        return "AssignmentWord"sv;
964
66.5k
    case Type::ListAssignmentWord:
965
66.5k
        return "ListAssignmentWord"sv;
966
0
    case Type::Bang:
967
0
        return "Bang"sv;
968
857
    case Type::Case:
969
857
        return "Case"sv;
970
119
    case Type::CloseBrace:
971
119
        return "CloseBrace"sv;
972
343
    case Type::Do:
973
343
        return "Do"sv;
974
444
    case Type::Done:
975
444
        return "Done"sv;
976
278
    case Type::Elif:
977
278
        return "Elif"sv;
978
0
    case Type::Else:
979
0
        return "Else"sv;
980
0
    case Type::Esac:
981
0
        return "Esac"sv;
982
161
    case Type::Fi:
983
161
        return "Fi"sv;
984
331
    case Type::For:
985
331
        return "For"sv;
986
590
    case Type::If:
987
590
        return "If"sv;
988
1.58k
    case Type::In:
989
1.58k
        return "In"sv;
990
504
    case Type::IoNumber:
991
504
        return "IoNumber"sv;
992
633
    case Type::OpenBrace:
993
633
        return "OpenBrace"sv;
994
0
    case Type::Then:
995
0
        return "Then"sv;
996
0
    case Type::Until:
997
0
        return "Until"sv;
998
1.45k
    case Type::VariableName:
999
1.45k
        return "VariableName"sv;
1000
0
    case Type::While:
1001
0
        return "While"sv;
1002
805
    case Type::Word:
1003
805
        return "Word"sv;
1004
2.83M
    }
1005
0
    return "Idk"sv;
1006
2.83M
}
1007
1008
}