/src/serenity/Userland/Libraries/LibJS/SourceCode.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2022-2023, Andreas Kling <kling@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/BinarySearch.h> |
8 | | #include <AK/Utf8View.h> |
9 | | #include <LibJS/SourceCode.h> |
10 | | #include <LibJS/SourceRange.h> |
11 | | #include <LibJS/Token.h> |
12 | | |
13 | | namespace JS { |
14 | | |
15 | | NonnullRefPtr<SourceCode const> SourceCode::create(String filename, String code) |
16 | 504 | { |
17 | 504 | return adopt_ref(*new SourceCode(move(filename), move(code))); |
18 | 504 | } |
19 | | |
20 | | SourceCode::SourceCode(String filename, String code) |
21 | 504 | : m_filename(move(filename)) |
22 | 504 | , m_code(move(code)) |
23 | 504 | { |
24 | 504 | } |
25 | | |
26 | | String const& SourceCode::filename() const |
27 | 0 | { |
28 | 0 | return m_filename; |
29 | 0 | } |
30 | | |
31 | | String const& SourceCode::code() const |
32 | 0 | { |
33 | 0 | return m_code; |
34 | 0 | } |
35 | | |
36 | | void SourceCode::fill_position_cache() const |
37 | 4 | { |
38 | 4 | constexpr size_t predicted_mimimum_cached_positions = 8; |
39 | 4 | constexpr size_t minimum_distance_between_cached_positions = 32; |
40 | 4 | constexpr size_t maximum_distance_between_cached_positions = 8192; |
41 | | |
42 | 4 | if (m_code.is_empty()) |
43 | 0 | return; |
44 | | |
45 | 4 | u32 previous_code_point = 0; |
46 | 4 | size_t line = 1; |
47 | 4 | size_t column = 1; |
48 | 4 | size_t offset_of_last_starting_point = 0; |
49 | 4 | m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions); |
50 | 4 | m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 }); |
51 | | |
52 | 4 | Utf8View const view(m_code); |
53 | 2.39M | for (auto it = view.begin(); it != view.end(); ++it) { |
54 | 2.39M | u32 code_point = *it; |
55 | 2.39M | bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; |
56 | | |
57 | 2.39M | auto byte_offset = view.byte_offset_of(it); |
58 | | |
59 | 2.39M | bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r'); |
60 | 2.39M | auto distance_between_cached_position = byte_offset - offset_of_last_starting_point; |
61 | | |
62 | 2.39M | if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) { |
63 | 327 | m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset }); |
64 | 327 | offset_of_last_starting_point = byte_offset; |
65 | 327 | } |
66 | | |
67 | 2.39M | if (is_line_terminator) { |
68 | 87 | line += 1; |
69 | 87 | column = 1; |
70 | 2.39M | } else { |
71 | 2.39M | column += 1; |
72 | 2.39M | } |
73 | | |
74 | 2.39M | previous_code_point = code_point; |
75 | 2.39M | } |
76 | 4 | } |
77 | | |
78 | | SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) const |
79 | 3.30k | { |
80 | | // If the underlying code is an empty string, the range is 1,1 - 1,1 no matter what. |
81 | 3.30k | if (m_code.is_empty()) |
82 | 0 | return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } }; |
83 | | |
84 | 3.30k | if (m_cached_positions.is_empty()) |
85 | 4 | fill_position_cache(); |
86 | | |
87 | 3.30k | Position current { .line = 1, .column = 1, .offset = 0 }; |
88 | | |
89 | 3.30k | if (!m_cached_positions.is_empty()) { |
90 | 3.30k | Position const dummy; |
91 | 3.30k | size_t nearest_index = 0; |
92 | 3.30k | binary_search(m_cached_positions, dummy, &nearest_index, |
93 | 22.4k | [&](auto&, auto& starting_point) { |
94 | 22.4k | return start_offset - starting_point.offset; |
95 | 22.4k | }); |
96 | | |
97 | 3.30k | current = m_cached_positions[nearest_index]; |
98 | 3.30k | } |
99 | | |
100 | 3.30k | Optional<Position> start; |
101 | 3.30k | Optional<Position> end; |
102 | | |
103 | 3.30k | u32 previous_code_point = 0; |
104 | | |
105 | 3.30k | Utf8View const view(m_code); |
106 | 73.8M | for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) { |
107 | | |
108 | | // If we're on or after the start offset, this is the start position. |
109 | 73.8M | if (!start.has_value() && view.byte_offset_of(it) >= start_offset) { |
110 | 3.30k | start = Position { |
111 | 3.30k | .line = current.line, |
112 | 3.30k | .column = current.column, |
113 | 3.30k | .offset = start_offset, |
114 | 3.30k | }; |
115 | 3.30k | } |
116 | | |
117 | | // If we're on or after the end offset, this is the end position. |
118 | 73.8M | if (!end.has_value() && view.byte_offset_of(it) >= end_offset) { |
119 | 3.05k | end = Position { |
120 | 3.05k | .line = current.line, |
121 | 3.05k | .column = current.column, |
122 | 3.05k | .offset = end_offset, |
123 | 3.05k | }; |
124 | 3.05k | break; |
125 | 3.05k | } |
126 | | |
127 | 73.8M | u32 code_point = *it; |
128 | | |
129 | 73.8M | bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; |
130 | 73.8M | previous_code_point = code_point; |
131 | | |
132 | 73.8M | if (is_line_terminator) { |
133 | 6.93k | current.line += 1; |
134 | 6.93k | current.column = 1; |
135 | 6.93k | continue; |
136 | 6.93k | } |
137 | 73.8M | current.column += 1; |
138 | 73.8M | } |
139 | | |
140 | | // If we didn't find both a start and end position, just return 1,1-1,1. |
141 | | // FIXME: This is a hack. Find a way to return the nicest possible values here. |
142 | 3.30k | if (!start.has_value() || !end.has_value()) |
143 | 251 | return SourceRange { *this, { .line = 1, .column = 1 }, { .line = 1, .column = 1 } }; |
144 | | |
145 | 3.05k | return SourceRange { *this, *start, *end }; |
146 | 3.30k | } |
147 | | |
148 | | } |