/src/llvm-project/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- LRGraph.cpp - -------------------------------------------*- C++-*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "clang-pseudo/grammar/LRGraph.h" |
10 | | #include "clang-pseudo/grammar/Grammar.h" |
11 | | #include "llvm/ADT/DenseSet.h" |
12 | | #include "llvm/ADT/Hashing.h" |
13 | | #include "llvm/ADT/STLExtras.h" |
14 | | #include "llvm/ADT/StringExtras.h" |
15 | | #include "llvm/Support/FormatVariadic.h" |
16 | | #include "llvm/Support/raw_ostream.h" |
17 | | |
18 | | using ItemSet = std::vector<clang::pseudo::Item>; |
19 | | |
20 | | namespace llvm { |
21 | | // Support clang::pseudo::Item as DenseMap keys. |
22 | | template <> struct DenseMapInfo<ItemSet> { |
23 | 31.5k | static inline ItemSet getEmptyKey() { |
24 | 31.5k | return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()}; |
25 | 31.5k | } |
26 | 30.0k | static inline ItemSet getTombstoneKey() { |
27 | 30.0k | return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()}; |
28 | 30.0k | } |
29 | 30.0k | static unsigned getHashValue(const ItemSet &I) { |
30 | 30.0k | return llvm::hash_combine_range(I.begin(), I.end()); |
31 | 30.0k | } |
32 | 131k | static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) { |
33 | 131k | return LHS == RHS; |
34 | 131k | } |
35 | | }; |
36 | | } // namespace llvm |
37 | | |
38 | | namespace clang { |
39 | | namespace pseudo { |
40 | | namespace { |
41 | | |
42 | | struct SortByNextSymbol { |
43 | 1.47k | SortByNextSymbol(const Grammar &G) : G(G) {} |
44 | 478k | bool operator()(const Item &L, const Item &R) { |
45 | 478k | if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G)) |
46 | 427k | return L.next(G) < R.next(G); |
47 | 50.4k | if (L.hasNext() != R.hasNext()) |
48 | 610 | return L.hasNext() < R.hasNext(); // a trailing dot is minimal. |
49 | 49.8k | return L < R; |
50 | 50.4k | } |
51 | | const Grammar &G; |
52 | | }; |
53 | | |
54 | | // Computes a closure of the given item set S: |
55 | | // - extends the given S to contain all options for parsing next token; |
56 | | // - nonterminals after a dot are recursively expanded into the begin-state |
57 | | // of all production rules that produce that nonterminal; |
58 | | // |
59 | | // Given |
60 | | // Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ] |
61 | | // Input = [ E := . T ] |
62 | | // returns [ E := . T, T := . n, T := . ( E ) ] |
63 | 1.47k | State closure(ItemSet Queue, const Grammar &G) { |
64 | 1.47k | llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()}; |
65 | | // We reuse the passed-by-value Queue as the final result, as it's already |
66 | | // initialized to the right elements. |
67 | 1.47k | size_t ItIndex = 0; |
68 | 53.7k | while (ItIndex < Queue.size()) { |
69 | 52.2k | const Item &ExpandingItem = Queue[ItIndex]; |
70 | 52.2k | ++ItIndex; |
71 | 52.2k | if (!ExpandingItem.hasNext()) |
72 | 1.05k | continue; |
73 | | |
74 | 51.2k | SymbolID NextSym = ExpandingItem.next(G); |
75 | 51.2k | if (pseudo::isToken(NextSym)) |
76 | 24.2k | continue; |
77 | 26.9k | auto RRange = G.table().Nonterminals[NextSym].RuleRange; |
78 | 147k | for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { |
79 | 120k | Item NewItem = Item::start(RID, G); |
80 | 120k | if (InQueue.insert(NewItem).second) // new |
81 | 49.6k | Queue.push_back(std::move(NewItem)); |
82 | 120k | } |
83 | 26.9k | } |
84 | 1.47k | Queue.shrink_to_fit(); |
85 | 1.47k | llvm::sort(Queue, SortByNextSymbol(G)); |
86 | 1.47k | return {std::move(Queue)}; |
87 | 1.47k | } |
88 | | |
89 | | // Returns all next (with a dot advanced) kernel item sets, partitioned by the |
90 | | // advanced symbol. |
91 | | // |
92 | | // Given |
93 | | // S = [ E := . a b, E := E . - T ] |
94 | | // returns [ |
95 | | // {id(a), [ E := a . b ]}, |
96 | | // {id(-), [ E := E - . T ]} |
97 | | // ] |
98 | | std::vector<std::pair<SymbolID, ItemSet>> |
99 | 1.47k | nextAvailableKernelItems(const State &S, const Grammar &G) { |
100 | 1.47k | std::vector<std::pair<SymbolID, ItemSet>> Results; |
101 | 1.47k | llvm::ArrayRef<Item> AllItems = S.Items; |
102 | 1.83k | AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); }); |
103 | 28.5k | while (!AllItems.empty()) { |
104 | 27.1k | SymbolID AdvancedSymbol = AllItems.front().next(G); |
105 | 77.5k | auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) { |
106 | 77.5k | assert(I.hasNext()); |
107 | 0 | return I.next(G) == AdvancedSymbol; |
108 | 77.5k | }); |
109 | 27.1k | assert(!Batch.empty()); |
110 | 0 | AllItems = AllItems.drop_front(Batch.size()); |
111 | | |
112 | | // Advance a dot over the Symbol. |
113 | 27.1k | ItemSet Next; |
114 | 27.1k | for (const Item &I : Batch) |
115 | 51.2k | Next.push_back(I.advance()); |
116 | | // sort the set to keep order determinism for hash computation. |
117 | 27.1k | llvm::sort(Next); |
118 | 27.1k | Results.push_back({AdvancedSymbol, std::move(Next)}); |
119 | 27.1k | } |
120 | 1.47k | return Results; |
121 | 1.47k | } |
122 | | |
123 | | std::vector<std::pair<ExtensionID, SymbolID>> |
124 | 1.47k | availableRecovery(const State &S, const Grammar &G) { |
125 | 1.47k | std::vector<std::pair<ExtensionID, SymbolID>> Result; |
126 | 52.2k | for (const Item &I : S.Items) { |
127 | 52.2k | const auto &Rule = G.lookupRule(I.rule()); |
128 | 52.2k | if (I.dot() != Rule.RecoveryIndex) |
129 | 52.2k | continue; |
130 | 27 | Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]}); |
131 | 27 | } |
132 | 1.47k | llvm::sort(Result); |
133 | 1.47k | Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); |
134 | 1.47k | return Result; |
135 | 1.47k | } |
136 | | |
137 | | } // namespace |
138 | | |
139 | 0 | std::string Item::dump(const Grammar &G) const { |
140 | 0 | const auto &Rule = G.lookupRule(RID); |
141 | 0 | auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) { |
142 | 0 | std::vector<llvm::StringRef> Results; |
143 | 0 | for (auto SID : Syms) |
144 | 0 | Results.push_back(G.symbolName(SID)); |
145 | 0 | return Results; |
146 | 0 | }; |
147 | 0 | return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target), |
148 | 0 | llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), |
149 | 0 | llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "), |
150 | 0 | Rule.RecoveryIndex == DotPos ? " [recovery]" : "") |
151 | 0 | .str(); |
152 | 0 | } |
153 | | |
154 | 0 | std::string State::dump(const Grammar &G, unsigned Indent) const { |
155 | 0 | std::string Result; |
156 | 0 | llvm::raw_string_ostream OS(Result); |
157 | 0 | for (const auto &Item : Items) |
158 | 0 | OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G)); |
159 | 0 | return OS.str(); |
160 | 0 | } |
161 | | |
162 | 0 | std::string LRGraph::dumpForTests(const Grammar &G) const { |
163 | 0 | std::string Result; |
164 | 0 | llvm::raw_string_ostream OS(Result); |
165 | 0 | OS << "States:\n"; |
166 | 0 | for (StateID ID = 0; ID < States.size(); ++ID) { |
167 | 0 | OS << llvm::formatv("State {0}\n", ID); |
168 | 0 | OS << States[ID].dump(G, /*Indent*/ 4); |
169 | 0 | } |
170 | 0 | for (const auto &E : Edges) { |
171 | 0 | OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label), |
172 | 0 | E.Dst); |
173 | 0 | } |
174 | 0 | return OS.str(); |
175 | 0 | } |
176 | | |
177 | 1 | LRGraph LRGraph::buildLR0(const Grammar &G) { |
178 | 1 | class Builder { |
179 | 1 | public: |
180 | 1 | Builder(const Grammar &G) : G(G) {} |
181 | | |
182 | | // Adds a given state if not existed. |
183 | 27.1k | std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) { |
184 | 27.1k | assert(llvm::is_sorted(KernelItems) && |
185 | 27.1k | "Item must be sorted before inserting to a hash map!"); |
186 | 0 | auto It = StatesIndex.find(KernelItems); |
187 | 27.1k | if (It != StatesIndex.end()) |
188 | 25.6k | return {It->second, false}; |
189 | 1.47k | States.push_back(closure(KernelItems, G)); |
190 | 1.47k | StateID NextStateID = States.size() - 1; |
191 | 1.47k | StatesIndex.insert({std::move(KernelItems), NextStateID}); |
192 | 1.47k | return {NextStateID, true}; |
193 | 27.1k | } |
194 | | |
195 | 27.1k | void insertEdge(StateID Src, StateID Dst, SymbolID Label) { |
196 | 27.1k | Edges.push_back({Src, Dst, Label}); |
197 | 27.1k | } |
198 | | |
199 | 6 | void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) { |
200 | 6 | Recoveries.push_back({Src, Strategy, Result}); |
201 | 6 | } |
202 | | |
203 | | // Returns a state with the given id. |
204 | 2.95k | const State &find(StateID ID) const { |
205 | 2.95k | assert(ID < States.size()); |
206 | 0 | return States[ID]; |
207 | 2.95k | } |
208 | | |
209 | 3 | void addStartState(SymbolID Sym, StateID State) { |
210 | 3 | StartStates.push_back({Sym, State}); |
211 | 3 | } |
212 | | |
213 | 1 | LRGraph build() && { |
214 | 1 | States.shrink_to_fit(); |
215 | 1 | Edges.shrink_to_fit(); |
216 | 1 | Recoveries.shrink_to_fit(); |
217 | 1 | llvm::sort(StartStates); |
218 | 1 | StartStates.shrink_to_fit(); |
219 | 1 | return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries), |
220 | 1 | std::move(StartStates)); |
221 | 1 | } |
222 | | |
223 | 1 | private: |
224 | | // Key is the **kernel** item sets. |
225 | 1 | llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; |
226 | 1 | std::vector<State> States; |
227 | 1 | std::vector<Edge> Edges; |
228 | 1 | std::vector<Recovery> Recoveries; |
229 | 1 | const Grammar &G; |
230 | 1 | std::vector<std::pair<SymbolID, StateID>> StartStates; |
231 | 1 | } Builder(G); |
232 | | |
233 | 1 | std::vector<StateID> PendingStates; |
234 | | // Initialize states with the start symbol. |
235 | 1 | auto RRange = G.table().Nonterminals[G.underscore()].RuleRange; |
236 | 4 | for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { |
237 | 3 | auto StartState = std::vector<Item>{Item::start(RID, G)}; |
238 | 3 | auto Result = Builder.insert(std::move(StartState)); |
239 | 3 | assert(Result.second && "State must be new"); |
240 | 0 | PendingStates.push_back(Result.first); |
241 | | |
242 | 3 | const Rule &StartRule = G.lookupRule(RID); |
243 | 3 | assert(StartRule.Size == 2 && |
244 | 3 | StartRule.seq().back() == tokenSymbol(tok::eof) && |
245 | 3 | "Start rule must be of the form `_ := start-symbol EOF`!"); |
246 | 0 | Builder.addStartState(StartRule.seq().front(), Result.first); |
247 | 3 | } |
248 | | |
249 | 1.47k | while (!PendingStates.empty()) { |
250 | 1.47k | auto StateID = PendingStates.back(); |
251 | 1.47k | PendingStates.pop_back(); |
252 | 27.1k | for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) { |
253 | 27.1k | auto Insert = Builder.insert(Next.second); |
254 | 27.1k | if (Insert.second) // new state, insert to the pending queue. |
255 | 1.47k | PendingStates.push_back(Insert.first); |
256 | 27.1k | Builder.insertEdge(StateID, Insert.first, Next.first); |
257 | 27.1k | } |
258 | 1.47k | for (auto Recovery : availableRecovery(Builder.find(StateID), G)) |
259 | 6 | Builder.insertRecovery(StateID, Recovery.first, Recovery.second); |
260 | 1.47k | } |
261 | 1 | return std::move(Builder).build(); |
262 | 1 | } |
263 | | |
264 | | } // namespace pseudo |
265 | | } // namespace clang |