Coverage Report

Created: 2026-02-16 07:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibShell/PosixLexer.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/CharacterTypes.h>
8
#include <LibShell/PosixLexer.h>
9
10
static bool is_operator(StringView text)
11
3.75M
{
12
3.75M
    return Shell::Posix::Token::operator_from_name(text).has_value();
13
3.75M
}
14
15
static bool is_part_of_operator(StringView text, char ch)
16
52.8M
{
17
52.8M
    StringBuilder builder;
18
52.8M
    builder.append(text);
19
52.8M
    builder.append(ch);
20
21
52.8M
    return Shell::Posix::Token::operator_from_name(builder.string_view()).has_value();
22
52.8M
}
23
24
namespace Shell::Posix {
25
26
ErrorOr<Vector<Token>> Lexer::batch_next(Optional<Reduction> starting_reduction)
27
7.25M
{
28
7.25M
    if (starting_reduction.has_value())
29
16.2k
        m_next_reduction = *starting_reduction;
30
31
97.4M
    for (; m_next_reduction != Reduction::None;) {
32
97.4M
        auto result = TRY(reduce(m_next_reduction));
33
97.4M
        m_next_reduction = result.next_reduction;
34
97.4M
        if (!result.tokens.is_empty())
35
7.25M
            return result.tokens;
36
97.4M
    }
37
38
0
    return Vector<Token> {};
39
7.25M
}
40
41
ExpansionRange Lexer::range(ssize_t offset) const
42
9.06M
{
43
9.06M
    return {
44
9.06M
        m_state.position.end_offset - m_state.position.start_offset + offset,
45
9.06M
        0,
46
9.06M
    };
47
9.06M
}
48
49
char Lexer::consume()
50
128M
{
51
128M
    auto ch = m_lexer.consume();
52
128M
    if (ch == '\n') {
53
2.53M
        m_state.position.end_line.line_number++;
54
2.53M
        m_state.position.end_line.line_column = 0;
55
2.53M
    }
56
57
128M
    m_state.position.end_offset++;
58
128M
    return ch;
59
128M
}
60
61
void Lexer::reconsume(StringView string)
62
2.46k
{
63
22.4M
    for (auto byte : string.bytes()) {
64
22.4M
        if (byte == '\n') {
65
1.52M
            m_state.position.end_line.line_number++;
66
1.52M
            m_state.position.end_line.line_column = 0;
67
1.52M
        }
68
69
22.4M
        m_state.position.end_offset++;
70
22.4M
    }
71
2.46k
}
72
73
bool Lexer::consume_specific(char ch)
74
344M
{
75
344M
    if (m_lexer.peek() == ch) {
76
8.21M
        consume();
77
8.21M
        return true;
78
8.21M
    }
79
336M
    return false;
80
344M
}
81
82
ErrorOr<Lexer::ReductionResult> Lexer::reduce(Reduction reduction)
83
137M
{
84
137M
    switch (reduction) {
85
0
    case Reduction::None:
86
0
        return ReductionResult { {}, Reduction::None };
87
637k
    case Reduction::End:
88
637k
        return reduce_end();
89
4.47M
    case Reduction::Operator:
90
4.47M
        return reduce_operator();
91
6.07M
    case Reduction::Comment:
92
6.07M
        return reduce_comment();
93
10.4M
    case Reduction::SingleQuotedString:
94
10.4M
        return reduce_single_quoted_string();
95
22.6M
    case Reduction::DoubleQuotedString:
96
22.6M
        return reduce_double_quoted_string();
97
4.83M
    case Reduction::Expansion:
98
4.83M
        return reduce_expansion();
99
14.1M
    case Reduction::CommandExpansion:
100
14.1M
        return reduce_command_expansion();
101
59.3M
    case Reduction::Start:
102
59.3M
        return reduce_start();
103
1.96M
    case Reduction::ArithmeticExpansion:
104
1.96M
        return reduce_arithmetic_expansion();
105
730k
    case Reduction::SpecialParameterExpansion:
106
730k
        return reduce_special_parameter_expansion();
107
2.39M
    case Reduction::ParameterExpansion:
108
2.39M
        return reduce_parameter_expansion();
109
197k
    case Reduction::CommandOrArithmeticSubstitutionExpansion:
110
197k
        return reduce_command_or_arithmetic_substitution_expansion();
111
7.45M
    case Reduction::ExtendedParameterExpansion:
112
7.45M
        return reduce_extended_parameter_expansion();
113
2.54M
    case Reduction::HeredocContents:
114
2.54M
        return reduce_heredoc_contents();
115
137M
    }
116
117
0
    VERIFY_NOT_REACHED();
118
0
}
119
120
ErrorOr<Lexer::ReductionResult> Lexer::reduce_end()
121
637k
{
122
637k
    return ReductionResult {
123
637k
        .tokens = { Token::eof() },
124
637k
        .next_reduction = Reduction::None,
125
637k
    };
126
637k
}
127
128
Lexer::HeredocKeyResult Lexer::process_heredoc_key(Token const& token)
129
318k
{
130
318k
    StringBuilder builder;
131
318k
    enum ParseState {
132
318k
        Free,
133
318k
        InDoubleQuotes,
134
318k
        InSingleQuotes,
135
318k
    };
136
318k
    Vector<ParseState, 4> parse_state;
137
318k
    parse_state.append(Free);
138
318k
    bool escaped = false;
139
318k
    bool had_a_single_quote_segment = false;
140
141
11.4M
    for (auto byte : token.value.bytes()) {
142
11.4M
        switch (parse_state.last()) {
143
6.83M
        case Free:
144
6.83M
            switch (byte) {
145
42
            case '"':
146
42
                if (escaped) {
147
6
                    builder.append(byte);
148
6
                    escaped = false;
149
36
                } else {
150
36
                    parse_state.append(InDoubleQuotes);
151
36
                }
152
42
                break;
153
36
            case '\'':
154
36
                if (escaped) {
155
18
                    builder.append(byte);
156
18
                    escaped = false;
157
18
                } else {
158
18
                    had_a_single_quote_segment = true;
159
18
                    parse_state.append(InSingleQuotes);
160
18
                }
161
36
                break;
162
2.31k
            case '\\':
163
2.31k
                if (escaped) {
164
1.14k
                    builder.append(byte);
165
1.14k
                    escaped = false;
166
1.17k
                } else {
167
1.17k
                    escaped = true;
168
1.17k
                }
169
2.31k
                break;
170
6.83M
            default:
171
                // NOTE: bash eats the backslash outside quotes :shrug:
172
6.83M
                if (escaped && parse_state.last() != Free) {
173
0
                    builder.append('\\');
174
0
                    escaped = false;
175
0
                }
176
6.83M
                builder.append(byte);
177
6.83M
                break;
178
6.83M
            }
179
6.83M
            break;
180
6.83M
        case InDoubleQuotes:
181
4.56M
            if (!escaped && byte == '"') {
182
2
                parse_state.take_last();
183
2
                break;
184
2
            }
185
4.56M
            if (escaped) {
186
2.01M
                if (byte != '"')
187
2.01M
                    builder.append('\\');
188
2.01M
                builder.append(byte);
189
2.01M
                break;
190
2.01M
            }
191
2.55M
            if (byte == '\\')
192
34
                escaped = true;
193
2.55M
            else
194
2.55M
                builder.append(byte);
195
2.55M
            break;
196
85.2k
        case InSingleQuotes:
197
85.2k
            if (byte == '\'') {
198
0
                parse_state.take_last();
199
0
                break;
200
0
            }
201
85.2k
            builder.append(byte);
202
85.2k
            break;
203
11.4M
        }
204
11.4M
    }
205
206
    // NOTE: Not checking the final state as any garbage that even partially parses is allowed to be used as a key :/
207
208
318k
    return {
209
318k
        .key = builder.to_string().release_value_but_fixme_should_propagate_errors(),
210
318k
        .allow_interpolation = !had_a_single_quote_segment,
211
318k
    };
212
318k
}
213
214
ErrorOr<Lexer::ReductionResult> Lexer::reduce_operator()
215
4.47M
{
216
4.47M
    if (m_lexer.is_eof()) {
217
4.33k
        if (is_operator(m_state.buffer.string_view())) {
218
4.33k
            auto tokens = TRY(Token::operators_from(m_state));
219
4.33k
            m_state.buffer.clear();
220
4.33k
            m_state.position.start_offset = m_state.position.end_offset;
221
4.33k
            m_state.position.start_line = m_state.position.end_line;
222
223
4.33k
            return ReductionResult {
224
4.33k
                .tokens = move(tokens),
225
4.33k
                .next_reduction = Reduction::End,
226
4.33k
            };
227
4.33k
        }
228
229
0
        return reduce(Reduction::Start);
230
4.33k
    }
231
232
4.47M
    if (is_part_of_operator(m_state.buffer.string_view(), m_lexer.peek())) {
233
720k
        m_state.buffer.append(consume());
234
720k
        return ReductionResult {
235
720k
            .tokens = {},
236
720k
            .next_reduction = Reduction::Operator,
237
720k
        };
238
720k
    }
239
240
3.75M
    auto tokens = Vector<Token> {};
241
3.75M
    if (is_operator(m_state.buffer.string_view())) {
242
3.75M
        tokens.extend(TRY(Token::operators_from(m_state)));
243
3.75M
        m_state.buffer.clear();
244
3.75M
        m_state.position.start_offset = m_state.position.end_offset;
245
3.75M
        m_state.position.start_line = m_state.position.end_line;
246
3.75M
    }
247
248
3.75M
    auto expect_heredoc_entry = !tokens.is_empty() && (tokens.last().type == Token::Type::DoubleLessDash || tokens.last().type == Token::Type::DoubleLess);
249
250
3.75M
    auto result = TRY(reduce(Reduction::Start));
251
3.75M
    tokens.extend(move(result.tokens));
252
253
12.7M
    while (expect_heredoc_entry && tokens.size() == 1 && result.next_reduction != Reduction::None) {
254
8.96M
        result = TRY(reduce(result.next_reduction));
255
8.96M
        tokens.extend(move(result.tokens));
256
8.96M
    }
257
258
3.75M
    if (expect_heredoc_entry && tokens.size() > 1) {
259
188k
        auto [key, interpolation] = process_heredoc_key(tokens[1]);
260
188k
        m_state.heredoc_entries.append(HeredocEntry {
261
188k
            .key = key,
262
188k
            .allow_interpolation = interpolation,
263
188k
            .dedent = tokens[0].type == Token::Type::DoubleLessDash,
264
188k
        });
265
188k
    }
266
267
3.75M
    return ReductionResult {
268
3.75M
        .tokens = move(tokens),
269
3.75M
        .next_reduction = result.next_reduction,
270
3.75M
    };
271
3.75M
}
272
273
ErrorOr<Lexer::ReductionResult> Lexer::reduce_comment()
274
6.07M
{
275
6.07M
    if (m_lexer.is_eof()) {
276
796
        return ReductionResult {
277
796
            .tokens = {},
278
796
            .next_reduction = Reduction::End,
279
796
        };
280
796
    }
281
282
6.07M
    if (consume() == '\n') {
283
6.45k
        m_state.on_new_line = true;
284
6.45k
        return ReductionResult {
285
6.45k
            .tokens = { Token::newline() },
286
6.45k
            .next_reduction = Reduction::Start,
287
6.45k
        };
288
6.45k
    }
289
290
6.06M
    return ReductionResult {
291
6.06M
        .tokens = {},
292
6.06M
        .next_reduction = Reduction::Comment,
293
6.06M
    };
294
6.07M
}
295
296
ErrorOr<Lexer::ReductionResult> Lexer::reduce_single_quoted_string()
297
10.4M
{
298
10.4M
    if (m_lexer.is_eof()) {
299
458
        auto tokens = TRY(Token::maybe_from_state(m_state));
300
451
        tokens.append(Token::continuation('\''));
301
451
        return ReductionResult {
302
451
            .tokens = move(tokens),
303
451
            .next_reduction = Reduction::End,
304
451
        };
305
458
    }
306
307
10.4M
    auto ch = consume();
308
10.4M
    m_state.buffer.append(ch);
309
310
10.4M
    if (ch == '\'') {
311
1.30k
        return ReductionResult {
312
1.30k
            .tokens = {},
313
1.30k
            .next_reduction = Reduction::Start,
314
1.30k
        };
315
1.30k
    }
316
317
10.4M
    return ReductionResult {
318
10.4M
        .tokens = {},
319
10.4M
        .next_reduction = Reduction::SingleQuotedString,
320
10.4M
    };
321
10.4M
}
322
323
ErrorOr<Lexer::ReductionResult> Lexer::reduce_double_quoted_string()
324
22.6M
{
325
22.6M
    m_state.previous_reduction = Reduction::DoubleQuotedString;
326
22.6M
    if (m_lexer.is_eof()) {
327
9.26k
        auto tokens = TRY(Token::maybe_from_state(m_state));
328
9.24k
        tokens.append(Token::continuation('"'));
329
9.24k
        return ReductionResult {
330
9.24k
            .tokens = move(tokens),
331
9.24k
            .next_reduction = Reduction::End,
332
9.24k
        };
333
9.26k
    }
334
335
22.6M
    auto ch = consume();
336
22.6M
    m_state.buffer.append(ch);
337
338
22.6M
    if (m_state.escaping) {
339
189k
        m_state.escaping = false;
340
341
189k
        return ReductionResult {
342
189k
            .tokens = {},
343
189k
            .next_reduction = Reduction::DoubleQuotedString,
344
189k
        };
345
189k
    }
346
347
22.4M
    switch (ch) {
348
189k
    case '\\':
349
189k
        m_state.escaping = true;
350
189k
        return ReductionResult {
351
189k
            .tokens = {},
352
189k
            .next_reduction = Reduction::DoubleQuotedString,
353
189k
        };
354
130k
    case '"':
355
130k
        m_state.previous_reduction = Reduction::Start;
356
130k
        return ReductionResult {
357
130k
            .tokens = {},
358
130k
            .next_reduction = Reduction::Start,
359
130k
        };
360
871k
    case '$':
361
871k
        if (m_lexer.next_is("("))
362
119k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
363
751k
        else
364
751k
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
365
871k
        return ReductionResult {
366
871k
            .tokens = {},
367
871k
            .next_reduction = Reduction::Expansion,
368
871k
        };
369
603k
    case '`':
370
603k
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
371
603k
        return ReductionResult {
372
603k
            .tokens = {},
373
603k
            .next_reduction = Reduction::CommandExpansion,
374
603k
        };
375
20.6M
    default:
376
20.6M
        return ReductionResult {
377
20.6M
            .tokens = {},
378
20.6M
            .next_reduction = Reduction::DoubleQuotedString,
379
20.6M
        };
380
22.4M
    }
381
22.4M
}
382
383
ErrorOr<Lexer::ReductionResult> Lexer::reduce_expansion()
384
4.83M
{
385
4.83M
    if (m_lexer.is_eof())
386
115k
        return reduce(m_state.previous_reduction);
387
388
4.72M
    auto ch = m_lexer.peek();
389
390
4.72M
    switch (ch) {
391
13.0k
    case '{':
392
13.0k
        consume();
393
13.0k
        m_state.buffer.append(ch);
394
13.0k
        return ReductionResult {
395
13.0k
            .tokens = {},
396
13.0k
            .next_reduction = Reduction::ExtendedParameterExpansion,
397
13.0k
        };
398
181k
    case '(':
399
181k
        consume();
400
181k
        m_state.buffer.append(ch);
401
181k
        return ReductionResult {
402
181k
            .tokens = {},
403
181k
            .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
404
181k
        };
405
42.8k
    case 'a' ... 'z':
406
135k
    case 'A' ... 'Z':
407
186k
    case '_': {
408
186k
        consume();
409
186k
        m_state.buffer.append(ch);
410
186k
        auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
411
186k
        expansion.parameter.append(ch);
412
186k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
413
414
186k
        return ReductionResult {
415
186k
            .tokens = {},
416
186k
            .next_reduction = Reduction::ParameterExpansion,
417
186k
        };
418
135k
    }
419
1.01k
    case '0' ... '9':
420
6.18k
    case '-':
421
13.7k
    case '!':
422
19.1k
    case '@':
423
213k
    case '#':
424
221k
    case '?':
425
221k
    case '*':
426
730k
    case '$':
427
730k
        return reduce(Reduction::SpecialParameterExpansion);
428
3.61M
    default:
429
3.61M
        m_state.buffer.append(ch);
430
3.61M
        return reduce(m_state.previous_reduction);
431
4.72M
    }
432
4.72M
}
433
434
ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_expansion()
435
14.1M
{
436
14.1M
    if (m_lexer.is_eof()) {
437
381
        auto& expansion = m_state.expansions.last().get<CommandExpansion>();
438
381
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
439
440
381
        return ReductionResult {
441
381
            .tokens = { Token::continuation('`') },
442
381
            .next_reduction = m_state.previous_reduction,
443
381
        };
444
381
    }
445
446
14.1M
    auto ch = consume();
447
448
14.1M
    if (!m_state.escaping && ch == '`') {
449
3.37M
        m_state.buffer.append(ch);
450
3.37M
        auto& expansion = m_state.expansions.last().get<CommandExpansion>();
451
3.37M
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
452
453
3.37M
        return ReductionResult {
454
3.37M
            .tokens = {},
455
3.37M
            .next_reduction = m_state.previous_reduction,
456
3.37M
        };
457
3.37M
    }
458
459
10.7M
    if (!m_state.escaping && ch == '\\') {
460
96.4k
        m_state.escaping = true;
461
96.4k
        return ReductionResult {
462
96.4k
            .tokens = {},
463
96.4k
            .next_reduction = Reduction::CommandExpansion,
464
96.4k
        };
465
96.4k
    }
466
467
10.6M
    m_state.escaping = false;
468
10.6M
    m_state.buffer.append(ch);
469
10.6M
    m_state.expansions.last().get<CommandExpansion>().command.append(ch);
470
10.6M
    return ReductionResult {
471
10.6M
        .tokens = {},
472
10.6M
        .next_reduction = Reduction::CommandExpansion,
473
10.6M
    };
474
10.7M
}
475
476
ErrorOr<Lexer::ReductionResult> Lexer::reduce_heredoc_contents()
477
2.54M
{
478
2.54M
    if (m_lexer.is_eof()) {
479
3.77k
        auto tokens = TRY(Token::maybe_from_state(m_state));
480
3.77k
        m_state.buffer.clear();
481
3.77k
        m_state.position.start_offset = m_state.position.end_offset;
482
3.77k
        m_state.position.start_line = m_state.position.end_line;
483
484
3.77k
        return ReductionResult {
485
3.77k
            .tokens = move(tokens),
486
3.77k
            .next_reduction = Reduction::End,
487
3.77k
        };
488
3.77k
    }
489
490
2.54M
    if (!m_state.escaping && consume_specific('\\')) {
491
6
        m_state.escaping = true;
492
6
        m_state.buffer.append('\\');
493
6
        return ReductionResult {
494
6
            .tokens = {},
495
6
            .next_reduction = Reduction::HeredocContents,
496
6
        };
497
6
    }
498
499
2.54M
    if (!m_state.escaping && consume_specific('$')) {
500
8.43k
        m_state.buffer.append('$');
501
8.43k
        if (m_lexer.next_is("("))
502
2.90k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
503
5.52k
        else
504
5.52k
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
505
506
8.43k
        return ReductionResult {
507
8.43k
            .tokens = {},
508
8.43k
            .next_reduction = Reduction::Expansion,
509
8.43k
        };
510
8.43k
    }
511
512
2.53M
    if (!m_state.escaping && consume_specific('`')) {
513
4.03k
        m_state.buffer.append('`');
514
4.03k
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
515
4.03k
        return ReductionResult {
516
4.03k
            .tokens = {},
517
4.03k
            .next_reduction = Reduction::CommandExpansion,
518
4.03k
        };
519
4.03k
    }
520
521
2.52M
    m_state.escaping = false;
522
2.52M
    m_state.buffer.append(consume());
523
2.52M
    return ReductionResult {
524
2.52M
        .tokens = {},
525
2.52M
        .next_reduction = Reduction::HeredocContents,
526
2.52M
    };
527
2.53M
}
528
529
ErrorOr<Lexer::ReductionResult> Lexer::reduce_start()
530
59.3M
{
531
59.3M
    auto was_on_new_line = m_state.on_new_line;
532
59.3M
    m_state.on_new_line = false;
533
534
59.3M
    if (m_lexer.is_eof()) {
535
621k
        auto tokens = TRY(Token::maybe_from_state(m_state));
536
621k
        m_state.buffer.clear();
537
621k
        m_state.expansions.clear();
538
621k
        m_state.position.start_offset = m_state.position.end_offset;
539
621k
        m_state.position.start_line = m_state.position.end_line;
540
541
621k
        return ReductionResult {
542
621k
            .tokens = move(tokens),
543
621k
            .next_reduction = Reduction::End,
544
621k
        };
545
621k
    }
546
547
58.7M
    if (was_on_new_line && !m_state.heredoc_entries.is_empty()) {
548
2.44k
        auto const& entry = m_state.heredoc_entries.first();
549
550
2.44k
        auto start_index = m_lexer.tell();
551
2.44k
        Optional<size_t> end_index;
552
553
22.3M
        for (; !m_lexer.is_eof();) {
554
22.3M
            auto index = m_lexer.tell();
555
22.3M
            auto possible_end_index = m_lexer.tell();
556
22.3M
            if (m_lexer.consume_specific('\n')) {
557
1.52M
                if (entry.dedent)
558
121k
                    m_lexer.ignore_while(is_any_of("\t"sv));
559
1.52M
                if (m_lexer.consume_specific(entry.key.bytes_as_string_view())) {
560
25.8k
                    if (m_lexer.consume_specific('\n') || m_lexer.is_eof()) {
561
27
                        end_index = possible_end_index;
562
27
                        break;
563
27
                    }
564
25.8k
                }
565
1.52M
            }
566
22.3M
            if (m_lexer.tell() == index)
567
20.8M
                m_lexer.ignore();
568
22.3M
        }
569
570
2.44k
        auto contents = m_lexer.input().substring_view(start_index, end_index.value_or(m_lexer.tell()) - start_index);
571
2.44k
        reconsume(contents);
572
2.44k
        if (end_index.has_value())
573
27
            reconsume(m_lexer.input().substring_view_starting_after_substring(contents).substring_view(0, m_lexer.tell() - *end_index));
574
575
2.44k
        m_state.buffer.clear();
576
2.44k
        m_state.buffer.append(contents);
577
578
2.44k
        auto token = TRY(Token::maybe_from_state(m_state)).first();
579
2.41k
        token.relevant_heredoc_key = entry.key;
580
2.41k
        token.type = Token::Type::HeredocContents;
581
582
2.41k
        m_state.heredoc_entries.take_first();
583
584
2.41k
        m_state.on_new_line = true;
585
586
2.41k
        m_state.buffer.clear();
587
2.41k
        m_state.position.start_offset = m_state.position.end_offset;
588
2.41k
        m_state.position.start_line = m_state.position.end_line;
589
590
2.41k
        Vector<Token> tokens { move(token), Token::newline() };
591
592
2.41k
        return ReductionResult {
593
2.41k
            .tokens = move(tokens),
594
2.41k
            .next_reduction = Reduction::Start,
595
2.41k
        };
596
2.44k
    }
597
598
58.7M
    if (m_state.escaping && consume_specific('\n')) {
599
331
        m_state.escaping = false;
600
601
331
        auto buffer = m_state.buffer.to_byte_string().substring(0, m_state.buffer.length() - 1);
602
331
        m_state.buffer.clear();
603
331
        m_state.buffer.append(buffer);
604
605
331
        return ReductionResult {
606
331
            .tokens = {},
607
331
            .next_reduction = Reduction::Start,
608
331
        };
609
331
    }
610
611
58.7M
    if (!m_state.escaping && m_lexer.peek() == '#' && m_state.buffer.is_empty()) {
612
7.24k
        consume();
613
7.24k
        return ReductionResult {
614
7.24k
            .tokens = {},
615
7.24k
            .next_reduction = Reduction::Comment,
616
7.24k
        };
617
7.24k
    }
618
619
58.7M
    if (!m_state.escaping && consume_specific('\n')) {
620
1.31M
        auto tokens = TRY(Token::maybe_from_state(m_state));
621
1.31M
        tokens.append(Token::newline());
622
623
1.31M
        m_state.on_new_line = true;
624
625
1.31M
        m_state.buffer.clear();
626
1.31M
        m_state.expansions.clear();
627
1.31M
        m_state.position.start_offset = m_state.position.end_offset;
628
1.31M
        m_state.position.start_line = m_state.position.end_line;
629
630
1.31M
        return ReductionResult {
631
1.31M
            .tokens = move(tokens),
632
1.31M
            .next_reduction = Reduction::Start,
633
1.31M
        };
634
1.31M
    }
635
636
57.4M
    if (!m_state.escaping && consume_specific('\\')) {
637
24.7k
        m_state.escaping = true;
638
24.7k
        m_state.buffer.append('\\');
639
24.7k
        return ReductionResult {
640
24.7k
            .tokens = {},
641
24.7k
            .next_reduction = Reduction::Start,
642
24.7k
        };
643
24.7k
    }
644
645
57.3M
    if (!m_state.escaping && consume_specific('\'')) {
646
1.76k
        m_state.buffer.append('\'');
647
1.76k
        return ReductionResult {
648
1.76k
            .tokens = {},
649
1.76k
            .next_reduction = Reduction::SingleQuotedString,
650
1.76k
        };
651
1.76k
    }
652
653
57.3M
    if (!m_state.escaping && consume_specific('"')) {
654
129k
        m_state.buffer.append('"');
655
129k
        return ReductionResult {
656
129k
            .tokens = {},
657
129k
            .next_reduction = Reduction::DoubleQuotedString,
658
129k
        };
659
129k
    }
660
661
57.2M
    if (!m_state.escaping && is_ascii_space(m_lexer.peek())) {
662
2.08M
        consume();
663
2.08M
        auto tokens = TRY(Token::maybe_from_state(m_state));
664
2.08M
        m_state.buffer.clear();
665
2.08M
        m_state.expansions.clear();
666
2.08M
        m_state.position.start_offset = m_state.position.end_offset;
667
2.08M
        m_state.position.start_line = m_state.position.end_line;
668
669
2.08M
        return ReductionResult {
670
2.08M
            .tokens = move(tokens),
671
2.08M
            .next_reduction = Reduction::Start,
672
2.08M
        };
673
2.08M
    }
674
675
55.1M
    if (!m_state.escaping && consume_specific('$')) {
676
3.95M
        m_state.buffer.append('$');
677
3.95M
        if (m_lexer.next_is("("))
678
58.2k
            m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
679
3.90M
        else
680
3.90M
            m_state.expansions.empend(ParameterExpansion { .parameter = StringBuilder {}, .range = range(-1) });
681
682
3.95M
        return ReductionResult {
683
3.95M
            .tokens = {},
684
3.95M
            .next_reduction = Reduction::Expansion,
685
3.95M
        };
686
3.95M
    }
687
688
51.2M
    if (!m_state.escaping && consume_specific('`')) {
689
2.77M
        m_state.buffer.append('`');
690
2.77M
        m_state.expansions.empend(CommandExpansion { .command = StringBuilder {}, .range = range(-1) });
691
2.77M
        return ReductionResult {
692
2.77M
            .tokens = {},
693
2.77M
            .next_reduction = Reduction::CommandExpansion,
694
2.77M
        };
695
2.77M
    }
696
697
48.4M
    if (!m_state.escaping && m_state.in_skip_mode && is_any_of("})"sv)(m_lexer.peek())) {
698
        // That's an eof for us.
699
74.1k
        return ReductionResult {
700
74.1k
            .tokens = {},
701
74.1k
            .next_reduction = Reduction::None,
702
74.1k
        };
703
74.1k
    }
704
705
48.3M
    if (!m_state.escaping && is_part_of_operator(""sv, m_lexer.peek())) {
706
3.76M
        auto tokens = TRY(Token::maybe_from_state(m_state));
707
3.76M
        m_state.buffer.clear();
708
3.76M
        m_state.buffer.append(consume());
709
3.76M
        m_state.expansions.clear();
710
3.76M
        m_state.position.start_offset = m_state.position.end_offset;
711
3.76M
        m_state.position.start_line = m_state.position.end_line;
712
713
3.76M
        return ReductionResult {
714
3.76M
            .tokens = move(tokens),
715
3.76M
            .next_reduction = Reduction::Operator,
716
3.76M
        };
717
3.76M
    }
718
719
44.5M
    m_state.escaping = false;
720
44.5M
    m_state.buffer.append(consume());
721
44.5M
    return ReductionResult {
722
44.5M
        .tokens = {},
723
44.5M
        .next_reduction = Reduction::Start,
724
44.5M
    };
725
48.3M
}
726
727
ErrorOr<Lexer::ReductionResult> Lexer::reduce_arithmetic_expansion()
728
1.96M
{
729
1.96M
    if (m_lexer.is_eof()) {
730
13
        auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
731
13
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
732
733
13
        return ReductionResult {
734
13
            .tokens = { Token::continuation("$(("_string) },
735
13
            .next_reduction = m_state.previous_reduction,
736
13
        };
737
13
    }
738
739
1.96M
    if (m_lexer.peek() == ')' && m_state.buffer.string_view().ends_with(')')) {
740
111k
        m_state.buffer.append(consume());
741
111k
        auto& expansion = m_state.expansions.last().get<ArithmeticExpansion>();
742
111k
        expansion.expression = TRY(String::from_utf8(expansion.value.string_view().substring_view(0, expansion.value.length() - 1)));
743
111k
        expansion.value.clear();
744
111k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
745
746
111k
        return ReductionResult {
747
111k
            .tokens = {},
748
111k
            .next_reduction = m_state.previous_reduction,
749
111k
        };
750
111k
    }
751
752
1.85M
    auto ch = consume();
753
1.85M
    m_state.buffer.append(ch);
754
1.85M
    m_state.expansions.last().get<ArithmeticExpansion>().value.append(ch);
755
1.85M
    return ReductionResult {
756
1.85M
        .tokens = {},
757
1.85M
        .next_reduction = Reduction::ArithmeticExpansion,
758
1.85M
    };
759
1.96M
}
760
761
ErrorOr<Lexer::ReductionResult> Lexer::reduce_special_parameter_expansion()
762
730k
{
763
730k
    auto ch = consume();
764
730k
    m_state.buffer.append(ch);
765
730k
    m_state.expansions.last() = ParameterExpansion {
766
730k
        .parameter = StringBuilder {},
767
730k
        .range = range(-2),
768
730k
    };
769
730k
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
770
730k
    expansion.parameter.append(ch);
771
730k
    expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
772
773
730k
    return ReductionResult {
774
730k
        .tokens = {},
775
730k
        .next_reduction = m_state.previous_reduction,
776
730k
    };
777
730k
}
778
779
ErrorOr<Lexer::ReductionResult> Lexer::reduce_parameter_expansion()
780
2.39M
{
781
2.39M
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
782
783
2.39M
    if (m_lexer.is_eof()) {
784
1.54k
        return ReductionResult {
785
1.54k
            .tokens = {},
786
1.54k
            .next_reduction = Reduction::Start,
787
1.54k
        };
788
1.54k
    }
789
790
2.39M
    auto next = m_lexer.peek();
791
2.39M
    if (is_ascii_alphanumeric(next) || next == '_') {
792
2.20M
        m_state.buffer.append(consume());
793
2.20M
        expansion.parameter.append(next);
794
2.20M
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
795
796
2.20M
        return ReductionResult {
797
2.20M
            .tokens = {},
798
2.20M
            .next_reduction = Reduction::ParameterExpansion,
799
2.20M
        };
800
2.20M
    }
801
802
184k
    return reduce(m_state.previous_reduction);
803
2.39M
}
804
805
ErrorOr<Lexer::ReductionResult> Lexer::reduce_command_or_arithmetic_substitution_expansion()
806
197k
{
807
197k
    auto ch = m_lexer.peek();
808
197k
    if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
809
111k
        m_state.buffer.append(consume());
810
111k
        m_state.expansions.last() = ArithmeticExpansion {
811
111k
            .expression = {},
812
111k
            .value = StringBuilder {},
813
111k
            .range = range(-2)
814
111k
        };
815
111k
        return ReductionResult {
816
111k
            .tokens = {},
817
111k
            .next_reduction = Reduction::ArithmeticExpansion,
818
111k
        };
819
111k
    }
820
821
85.9k
    auto saved_position = m_state.position;
822
85.9k
    {
823
85.9k
        auto skip_mode = switch_to_skip_mode();
824
825
85.9k
        auto next_reduction = Reduction::Start;
826
23.0M
        do {
827
23.0M
            auto result = TRY(reduce(next_reduction));
828
23.0M
            next_reduction = result.next_reduction;
829
23.0M
        } while (next_reduction != Reduction::None);
830
85.9k
        saved_position = m_state.position;
831
85.7k
    }
832
833
85.7k
    auto const skipped_text = m_lexer.input().substring_view(m_state.position.end_offset, saved_position.end_offset - m_state.position.end_offset);
834
85.7k
    m_state.position.end_offset = saved_position.end_offset;
835
85.7k
    m_state.position.end_line = saved_position.end_line;
836
837
85.7k
    m_state.buffer.append(skipped_text);
838
85.7k
    m_state.expansions.last().get<CommandExpansion>().command.append(skipped_text);
839
85.7k
    m_state.expansions.last().visit([&](auto& expansion) {
840
85.7k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
841
85.7k
    });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::ParameterExpansion>(Shell::Posix::ParameterExpansion&) const
PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::CommandExpansion>(Shell::Posix::CommandExpansion&) const
Line
Count
Source
839
85.7k
    m_state.expansions.last().visit([&](auto& expansion) {
840
85.7k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
841
85.7k
    });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_0::operator()<Shell::Posix::ArithmeticExpansion>(Shell::Posix::ArithmeticExpansion&) const
842
843
85.7k
    if (m_lexer.is_eof()) {
844
11.5k
        return ReductionResult {
845
11.5k
            .tokens = { Token::continuation("$("_string) },
846
11.5k
            .next_reduction = m_state.previous_reduction,
847
11.5k
        };
848
11.5k
    }
849
850
74.1k
    ch = m_lexer.peek();
851
74.1k
    if (ch == '(' && m_state.buffer.string_view().ends_with("$("sv)) {
852
0
        m_state.buffer.append(consume());
853
0
        m_state.expansions.last() = ArithmeticExpansion {
854
0
            .expression = {},
855
0
            .value = m_state.expansions.last().get<CommandExpansion>().command,
856
0
            .range = range(-2)
857
0
        };
858
0
        return ReductionResult {
859
0
            .tokens = {},
860
0
            .next_reduction = Reduction::ArithmeticExpansion,
861
0
        };
862
0
    }
863
864
74.1k
    if (ch == ')') {
865
57.7k
        m_state.buffer.append(consume());
866
57.7k
        m_state.expansions.last().visit([&](auto& expansion) {
867
57.7k
            expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
868
57.7k
        });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::ParameterExpansion>(Shell::Posix::ParameterExpansion&) const
PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::CommandExpansion>(Shell::Posix::CommandExpansion&) const
Line
Count
Source
866
57.7k
        m_state.expansions.last().visit([&](auto& expansion) {
867
57.7k
            expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
868
57.7k
        });
Unexecuted instantiation: PosixLexer.cpp:auto Shell::Posix::Lexer::reduce_command_or_arithmetic_substitution_expansion()::$_1::operator()<Shell::Posix::ArithmeticExpansion>(Shell::Posix::ArithmeticExpansion&) const
869
57.7k
        return ReductionResult {
870
57.7k
            .tokens = {},
871
57.7k
            .next_reduction = m_state.previous_reduction,
872
57.7k
        };
873
57.7k
    }
874
875
16.3k
    m_state.buffer.append(consume());
876
16.3k
    m_state.expansions.last().get<CommandExpansion>().command.append(ch);
877
16.3k
    return ReductionResult {
878
16.3k
        .tokens = {},
879
16.3k
        .next_reduction = Reduction::CommandOrArithmeticSubstitutionExpansion,
880
16.3k
    };
881
74.1k
}
882
883
ErrorOr<Lexer::ReductionResult> Lexer::reduce_extended_parameter_expansion()
884
7.45M
{
885
7.45M
    auto& expansion = m_state.expansions.last().get<ParameterExpansion>();
886
887
7.45M
    if (m_lexer.is_eof()) {
888
646
        return ReductionResult {
889
646
            .tokens = { Token::continuation("${"_string) },
890
646
            .next_reduction = m_state.previous_reduction,
891
646
        };
892
646
    }
893
894
7.45M
    auto ch = m_lexer.peek();
895
7.45M
    if (ch == '}') {
896
12.4k
        m_state.buffer.append(consume());
897
12.4k
        expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
898
899
12.4k
        return ReductionResult {
900
12.4k
            .tokens = {},
901
12.4k
            .next_reduction = m_state.previous_reduction,
902
12.4k
        };
903
12.4k
    }
904
905
7.44M
    m_state.buffer.append(consume());
906
7.44M
    expansion.parameter.append(ch);
907
7.44M
    expansion.range.length = m_state.position.end_offset - expansion.range.start - m_state.position.start_offset;
908
909
7.44M
    return ReductionResult {
910
7.44M
        .tokens = {},
911
7.44M
        .next_reduction = Reduction::ExtendedParameterExpansion,
912
7.44M
    };
913
7.45M
}
914
915
StringView Token::type_name() const
916
1.64M
{
917
1.64M
    switch (type) {
918
9.70k
    case Type::Eof:
919
9.70k
        return "Eof"sv;
920
57.6k
    case Type::Newline:
921
57.6k
        return "Newline"sv;
922
1.53k
    case Type::Continuation:
923
1.53k
        return "Continuation"sv;
924
0
    case Type::Token:
925
0
        return "Token"sv;
926
96.4k
    case Type::And:
927
96.4k
        return "And"sv;
928
362k
    case Type::Pipe:
929
362k
        return "Pipe"sv;
930
13.7k
    case Type::OpenParen:
931
13.7k
        return "OpenParen"sv;
932
576k
    case Type::CloseParen:
933
576k
        return "CloseParen"sv;
934
452
    case Type::Great:
935
452
        return "Great"sv;
936
35
    case Type::Less:
937
35
        return "Less"sv;
938
176
    case Type::AndIf:
939
176
        return "AndIf"sv;
940
321k
    case Type::OrIf:
941
321k
        return "OrIf"sv;
942
181
    case Type::DoubleSemicolon:
943
181
        return "DoubleSemicolon"sv;
944
13
    case Type::DoubleLess:
945
13
        return "DoubleLess"sv;
946
2
    case Type::DoubleGreat:
947
2
        return "DoubleGreat"sv;
948
43.8k
    case Type::LessAnd:
949
43.8k
        return "LessAnd"sv;
950
17.9k
    case Type::GreatAnd:
951
17.9k
        return "GreatAnd"sv;
952
0
    case Type::LessGreat:
953
0
        return "LessGreat"sv;
954
0
    case Type::DoubleLessDash:
955
0
        return "DoubleLessDash"sv;
956
2
    case Type::Clobber:
957
2
        return "Clobber"sv;
958
103k
    case Type::Semicolon:
959
103k
        return "Semicolon"sv;
960
0
    case Type::HeredocContents:
961
0
        return "HeredocContents"sv;
962
709
    case Type::AssignmentWord:
963
709
        return "AssignmentWord"sv;
964
33.3k
    case Type::ListAssignmentWord:
965
33.3k
        return "ListAssignmentWord"sv;
966
0
    case Type::Bang:
967
0
        return "Bang"sv;
968
62
    case Type::Case:
969
62
        return "Case"sv;
970
61
    case Type::CloseBrace:
971
61
        return "CloseBrace"sv;
972
336
    case Type::Do:
973
336
        return "Do"sv;
974
445
    case Type::Done:
975
445
        return "Done"sv;
976
330
    case Type::Elif:
977
330
        return "Elif"sv;
978
0
    case Type::Else:
979
0
        return "Else"sv;
980
0
    case Type::Esac:
981
0
        return "Esac"sv;
982
143
    case Type::Fi:
983
143
        return "Fi"sv;
984
28
    case Type::For:
985
28
        return "For"sv;
986
32
    case Type::If:
987
32
        return "If"sv;
988
108
    case Type::In:
989
108
        return "In"sv;
990
507
    case Type::IoNumber:
991
507
        return "IoNumber"sv;
992
713
    case Type::OpenBrace:
993
713
        return "OpenBrace"sv;
994
0
    case Type::Then:
995
0
        return "Then"sv;
996
0
    case Type::Until:
997
0
        return "Until"sv;
998
1.73k
    case Type::VariableName:
999
1.73k
        return "VariableName"sv;
1000
0
    case Type::While:
1001
0
        return "While"sv;
1002
715
    case Type::Word:
1003
715
        return "Word"sv;
1004
1.64M
    }
1005
0
    return "Idk"sv;
1006
1.64M
}
1007
1008
}