Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- LRGraph.cpp - -------------------------------------------*- C++-*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang-pseudo/grammar/LRGraph.h"
10
#include "clang-pseudo/grammar/Grammar.h"
11
#include "llvm/ADT/DenseSet.h"
12
#include "llvm/ADT/Hashing.h"
13
#include "llvm/ADT/STLExtras.h"
14
#include "llvm/ADT/StringExtras.h"
15
#include "llvm/Support/FormatVariadic.h"
16
#include "llvm/Support/raw_ostream.h"
17
18
using ItemSet = std::vector<clang::pseudo::Item>;
19
20
namespace llvm {
21
// Support clang::pseudo::Item as DenseMap keys.
22
template <> struct DenseMapInfo<ItemSet> {
23
31.5k
  static inline ItemSet getEmptyKey() {
24
31.5k
    return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()};
25
31.5k
  }
26
30.0k
  static inline ItemSet getTombstoneKey() {
27
30.0k
    return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()};
28
30.0k
  }
29
30.0k
  static unsigned getHashValue(const ItemSet &I) {
30
30.0k
    return llvm::hash_combine_range(I.begin(), I.end());
31
30.0k
  }
32
131k
  static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) {
33
131k
    return LHS == RHS;
34
131k
  }
35
};
36
} // namespace llvm
37
38
namespace clang {
39
namespace pseudo {
40
namespace {
41
42
struct SortByNextSymbol {
43
1.47k
  SortByNextSymbol(const Grammar &G) : G(G) {}
44
478k
  bool operator()(const Item &L, const Item &R) {
45
478k
    if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G))
46
427k
      return L.next(G) < R.next(G);
47
50.4k
    if (L.hasNext() != R.hasNext())
48
610
      return L.hasNext() < R.hasNext(); //  a trailing dot is minimal.
49
49.8k
    return L < R;
50
50.4k
  }
51
  const Grammar &G;
52
};
53
54
// Computes a closure of the given item set S:
55
//  - extends the given S to contain all options for parsing next token;
56
//  - nonterminals after a dot are recursively expanded into the begin-state
57
//    of all production rules that produce that nonterminal;
58
//
59
// Given
60
//   Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ]
61
//   Input = [ E := . T ]
62
// returns [ E :=  . T, T := . n, T := . ( E ) ]
63
1.47k
State closure(ItemSet Queue, const Grammar &G) {
64
1.47k
  llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()};
65
  // We reuse the passed-by-value Queue as the final result, as it's already
66
  // initialized to the right elements.
67
1.47k
  size_t ItIndex = 0;
68
53.7k
  while (ItIndex < Queue.size()) {
69
52.2k
    const Item &ExpandingItem = Queue[ItIndex];
70
52.2k
    ++ItIndex;
71
52.2k
    if (!ExpandingItem.hasNext())
72
1.05k
      continue;
73
74
51.2k
    SymbolID NextSym = ExpandingItem.next(G);
75
51.2k
    if (pseudo::isToken(NextSym))
76
24.2k
      continue;
77
26.9k
    auto RRange = G.table().Nonterminals[NextSym].RuleRange;
78
147k
    for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
79
120k
      Item NewItem = Item::start(RID, G);
80
120k
      if (InQueue.insert(NewItem).second) // new
81
49.6k
        Queue.push_back(std::move(NewItem));
82
120k
    }
83
26.9k
  }
84
1.47k
  Queue.shrink_to_fit();
85
1.47k
  llvm::sort(Queue, SortByNextSymbol(G));
86
1.47k
  return {std::move(Queue)};
87
1.47k
}
88
89
// Returns all next (with a dot advanced) kernel item sets, partitioned by the
90
// advanced symbol.
91
//
92
// Given
93
//  S = [ E := . a b, E := E . - T ]
94
// returns [
95
//   {id(a), [ E := a . b ]},
96
//   {id(-), [ E := E - . T ]}
97
// ]
98
std::vector<std::pair<SymbolID, ItemSet>>
99
1.47k
nextAvailableKernelItems(const State &S, const Grammar &G) {
100
1.47k
  std::vector<std::pair<SymbolID, ItemSet>> Results;
101
1.47k
  llvm::ArrayRef<Item> AllItems = S.Items;
102
1.83k
  AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); });
103
28.5k
  while (!AllItems.empty()) {
104
27.1k
    SymbolID AdvancedSymbol = AllItems.front().next(G);
105
77.5k
    auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) {
106
77.5k
      assert(I.hasNext());
107
0
      return I.next(G) == AdvancedSymbol;
108
77.5k
    });
109
27.1k
    assert(!Batch.empty());
110
0
    AllItems = AllItems.drop_front(Batch.size());
111
112
    // Advance a dot over the Symbol.
113
27.1k
    ItemSet Next;
114
27.1k
    for (const Item &I : Batch)
115
51.2k
      Next.push_back(I.advance());
116
    // sort the set to keep order determinism for hash computation.
117
27.1k
    llvm::sort(Next);
118
27.1k
    Results.push_back({AdvancedSymbol, std::move(Next)});
119
27.1k
  }
120
1.47k
  return Results;
121
1.47k
}
122
123
std::vector<std::pair<ExtensionID, SymbolID>>
124
1.47k
availableRecovery(const State &S, const Grammar &G) {
125
1.47k
  std::vector<std::pair<ExtensionID, SymbolID>> Result;
126
52.2k
  for (const Item &I : S.Items) {
127
52.2k
    const auto &Rule = G.lookupRule(I.rule());
128
52.2k
    if (I.dot() != Rule.RecoveryIndex)
129
52.2k
      continue;
130
27
    Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]});
131
27
  }
132
1.47k
  llvm::sort(Result);
133
1.47k
  Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
134
1.47k
  return Result;
135
1.47k
}
136
137
} // namespace
138
139
0
std::string Item::dump(const Grammar &G) const {
140
0
  const auto &Rule = G.lookupRule(RID);
141
0
  auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) {
142
0
    std::vector<llvm::StringRef> Results;
143
0
    for (auto SID : Syms)
144
0
      Results.push_back(G.symbolName(SID));
145
0
    return Results;
146
0
  };
147
0
  return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target),
148
0
                       llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "),
149
0
                       llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "),
150
0
                       Rule.RecoveryIndex == DotPos ? " [recovery]" : "")
151
0
      .str();
152
0
}
153
154
0
std::string State::dump(const Grammar &G, unsigned Indent) const {
155
0
  std::string Result;
156
0
  llvm::raw_string_ostream OS(Result);
157
0
  for (const auto &Item : Items)
158
0
    OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G));
159
0
  return OS.str();
160
0
}
161
162
0
std::string LRGraph::dumpForTests(const Grammar &G) const {
163
0
  std::string Result;
164
0
  llvm::raw_string_ostream OS(Result);
165
0
  OS << "States:\n";
166
0
  for (StateID ID = 0; ID < States.size(); ++ID) {
167
0
    OS << llvm::formatv("State {0}\n", ID);
168
0
    OS << States[ID].dump(G, /*Indent*/ 4);
169
0
  }
170
0
  for (const auto &E : Edges) {
171
0
    OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label),
172
0
                        E.Dst);
173
0
  }
174
0
  return OS.str();
175
0
}
176
177
1
LRGraph LRGraph::buildLR0(const Grammar &G) {
178
1
  class Builder {
179
1
  public:
180
1
    Builder(const Grammar &G) : G(G) {}
181
182
    // Adds a given state if not existed.
183
27.1k
    std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) {
184
27.1k
      assert(llvm::is_sorted(KernelItems) &&
185
27.1k
             "Item must be sorted before inserting to a hash map!");
186
0
      auto It = StatesIndex.find(KernelItems);
187
27.1k
      if (It != StatesIndex.end())
188
25.6k
        return {It->second, false};
189
1.47k
      States.push_back(closure(KernelItems, G));
190
1.47k
      StateID NextStateID = States.size() - 1;
191
1.47k
      StatesIndex.insert({std::move(KernelItems), NextStateID});
192
1.47k
      return {NextStateID, true};
193
27.1k
    }
194
195
27.1k
    void insertEdge(StateID Src, StateID Dst, SymbolID Label) {
196
27.1k
      Edges.push_back({Src, Dst, Label});
197
27.1k
    }
198
199
6
    void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) {
200
6
      Recoveries.push_back({Src, Strategy, Result});
201
6
    }
202
203
    // Returns a state with the given id.
204
2.95k
    const State &find(StateID ID) const {
205
2.95k
      assert(ID < States.size());
206
0
      return States[ID];
207
2.95k
    }
208
209
3
    void addStartState(SymbolID Sym, StateID State) {
210
3
      StartStates.push_back({Sym, State});
211
3
    }
212
213
1
    LRGraph build() && {
214
1
      States.shrink_to_fit();
215
1
      Edges.shrink_to_fit();
216
1
      Recoveries.shrink_to_fit();
217
1
      llvm::sort(StartStates);
218
1
      StartStates.shrink_to_fit();
219
1
      return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries),
220
1
                     std::move(StartStates));
221
1
    }
222
223
1
  private:
224
    // Key is the **kernel** item sets.
225
1
    llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex;
226
1
    std::vector<State> States;
227
1
    std::vector<Edge> Edges;
228
1
    std::vector<Recovery> Recoveries;
229
1
    const Grammar &G;
230
1
    std::vector<std::pair<SymbolID, StateID>> StartStates;
231
1
  } Builder(G);
232
233
1
  std::vector<StateID> PendingStates;
234
  // Initialize states with the start symbol.
235
1
  auto RRange = G.table().Nonterminals[G.underscore()].RuleRange;
236
4
  for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
237
3
    auto StartState = std::vector<Item>{Item::start(RID, G)};
238
3
    auto Result = Builder.insert(std::move(StartState));
239
3
    assert(Result.second && "State must be new");
240
0
    PendingStates.push_back(Result.first);
241
242
3
    const Rule &StartRule = G.lookupRule(RID);
243
3
    assert(StartRule.Size == 2 &&
244
3
           StartRule.seq().back() == tokenSymbol(tok::eof) &&
245
3
           "Start rule must be of the form `_ := start-symbol EOF`!");
246
0
    Builder.addStartState(StartRule.seq().front(), Result.first);
247
3
  }
248
249
1.47k
  while (!PendingStates.empty()) {
250
1.47k
    auto StateID = PendingStates.back();
251
1.47k
    PendingStates.pop_back();
252
27.1k
    for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) {
253
27.1k
      auto Insert = Builder.insert(Next.second);
254
27.1k
      if (Insert.second) // new state, insert to the pending queue.
255
1.47k
        PendingStates.push_back(Insert.first);
256
27.1k
      Builder.insertEdge(StateID, Insert.first, Next.first);
257
27.1k
    }
258
1.47k
    for (auto Recovery : availableRecovery(Builder.find(StateID), G))
259
6
      Builder.insertRecovery(StateID, Recovery.first, Recovery.second);
260
1.47k
  }
261
1
  return std::move(Builder).build();
262
1
}
263
264
} // namespace pseudo
265
} // namespace clang