Coverage Report

Created: 2025-03-04 07:22

/src/serenity/Userland/Libraries/LibJS/SourceCode.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2022-2023, Andreas Kling <kling@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include <AK/BinarySearch.h>
8
#include <AK/Utf8View.h>
9
#include <LibJS/SourceCode.h>
10
#include <LibJS/SourceRange.h>
11
#include <LibJS/Token.h>
12
13
namespace JS {
14
15
NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code)
16
504
{
17
504
    return adopt_ref(*new SourceCode(move(filename), move(code)));
18
504
}
19
20
SourceCode::SourceCode(String filename, String code)
21
504
    : m_filename(move(filename))
22
504
    , m_code(move(code))
23
504
{
24
504
}
25
26
String const& SourceCode::filename() const
27
0
{
28
0
    return m_filename;
29
0
}
30
31
String const& SourceCode::code() const
32
0
{
33
0
    return m_code;
34
0
}
35
36
void SourceCode::fill_position_cache() const
37
4
{
38
4
    constexpr size_t predicted_mimimum_cached_positions = 8;
39
4
    constexpr size_t minimum_distance_between_cached_positions = 32;
40
4
    constexpr size_t maximum_distance_between_cached_positions = 8192;
41
42
4
    if (m_code.is_empty())
43
0
        return;
44
45
4
    u32 previous_code_point = 0;
46
4
    size_t line = 1;
47
4
    size_t column = 1;
48
4
    size_t offset_of_last_starting_point = 0;
49
4
    m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions);
50
4
    m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
51
52
4
    Utf8View const view(m_code);
53
2.39M
    for (auto it = view.begin(); it != view.end(); ++it) {
54
2.39M
        u32 code_point = *it;
55
2.39M
        bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
56
57
2.39M
        auto byte_offset = view.byte_offset_of(it);
58
59
2.39M
        bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r');
60
2.39M
        auto distance_between_cached_position = byte_offset - offset_of_last_starting_point;
61
62
2.39M
        if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) {
63
327
            m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
64
327
            offset_of_last_starting_point = byte_offset;
65
327
        }
66
67
2.39M
        if (is_line_terminator) {
68
87
            line += 1;
69
87
            column = 1;
70
2.39M
        } else {
71
2.39M
            column += 1;
72
2.39M
        }
73
74
2.39M
        previous_code_point = code_point;
75
2.39M
    }
76
4
}
77
78
SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const
79
3.30k
{
80
    // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what.
81
3.30k
    if (m_code.is_empty())
82
0
        return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
83
84
3.30k
    if (m_cached_positions.is_empty())
85
4
        fill_position_cache();
86
87
3.30k
    Position current { .line = 1, .column = 1, .offset = 0 };
88
89
3.30k
    if (!m_cached_positions.is_empty()) {
90
3.30k
        Position const dummy;
91
3.30k
        size_t nearest_index = 0;
92
3.30k
        binary_search(m_cached_positions, dummy, &nearest_index,
93
22.4k
            [&](auto&, auto& starting_point) {
94
22.4k
                return start_offset - starting_point.offset;
95
22.4k
            });
96
97
3.30k
        current = m_cached_positions[nearest_index];
98
3.30k
    }
99
100
3.30k
    Optional<Position> start;
101
3.30k
    Optional<Position> end;
102
103
3.30k
    u32 previous_code_point = 0;
104
105
3.30k
    Utf8View const view(m_code);
106
73.8M
    for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
107
108
        // If we're on or after the start offset, this is the start position.
109
73.8M
        if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
110
3.30k
            start = Position {
111
3.30k
                .line = current.line,
112
3.30k
                .column = current.column,
113
3.30k
                .offset = start_offset,
114
3.30k
            };
115
3.30k
        }
116
117
        // If we're on or after the end offset, this is the end position.
118
73.8M
        if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
119
3.05k
            end = Position {
120
3.05k
                .line = current.line,
121
3.05k
                .column = current.column,
122
3.05k
                .offset = end_offset,
123
3.05k
            };
124
3.05k
            break;
125
3.05k
        }
126
127
73.8M
        u32 code_point = *it;
128
129
73.8M
        bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
130
73.8M
        previous_code_point = code_point;
131
132
73.8M
        if (is_line_terminator) {
133
6.93k
            current.line += 1;
134
6.93k
            current.column = 1;
135
6.93k
            continue;
136
6.93k
        }
137
73.8M
        current.column += 1;
138
73.8M
    }
139
140
    // If we didn't find both a start and end position, just return 1,1-1,1.
141
    // FIXME: This is a hack. Find a way to return the nicest possible values here.
142
3.30k
    if (!start.has_value() || !end.has_value())
143
251
        return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } };
144
145
3.05k
    return SourceRange { *this, *start, *end };
146
3.30k
}
147
148
}