Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Grammar.cpp - Grammar for clang pseudoparser  -----------*- C++-*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang-pseudo/grammar/Grammar.h"
10
#include "clang/Basic/TokenKinds.h"
11
#include "llvm/ADT/ArrayRef.h"
12
#include "llvm/ADT/STLExtras.h"
13
#include "llvm/ADT/StringRef.h"
14
#include "llvm/Support/FormatVariadic.h"
15
#include "llvm/Support/raw_ostream.h"
16
#include <optional>
17
18
namespace clang {
19
namespace pseudo {
20
21
Rule::Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Sequence)
22
884
    : Target(Target), Size(static_cast<uint8_t>(Sequence.size())) {
23
884
  assert(Sequence.size() <= Rule::MaxElements);
24
0
  llvm::copy(Sequence, this->Sequence);
25
884
}
26
27
1
Grammar::Grammar(std::unique_ptr<GrammarTable> Table) : T(std::move(Table)) {
28
1
  Underscore = *findNonterminal("_");
29
1
}
30
31
0
llvm::ArrayRef<Rule> Grammar::rulesFor(SymbolID SID) const {
32
0
  assert(isNonterminal(SID));
33
0
  const auto &R = T->Nonterminals[SID].RuleRange;
34
0
  assert(R.End <= T->Rules.size());
35
0
  return llvm::ArrayRef(&T->Rules[R.Start], R.End - R.Start);
36
0
}
37
38
65.6M
const Rule &Grammar::lookupRule(RuleID RID) const {
39
65.6M
  assert(RID < T->Rules.size());
40
0
  return T->Rules[RID];
41
65.6M
}
42
43
0
llvm::StringRef Grammar::symbolName(SymbolID SID) const {
44
0
  if (isToken(SID))
45
0
    return T->Terminals[symbolToToken(SID)];
46
0
  return T->Nonterminals[SID].Name;
47
0
}
48
49
9.56k
std::optional<SymbolID> Grammar::findNonterminal(llvm::StringRef Name) const {
50
9.56k
  auto It = llvm::partition_point(
51
9.56k
      T->Nonterminals,
52
76.5k
      [&](const GrammarTable::Nonterminal &X) { return X.Name < Name; });
53
9.56k
  if (It != T->Nonterminals.end() && It->Name == Name)
54
9.56k
    return It - T->Nonterminals.begin();
55
0
  return std::nullopt;
56
9.56k
}
57
58
0
std::string Grammar::dumpRule(RuleID RID) const {
59
0
  std::string Result;
60
0
  llvm::raw_string_ostream OS(Result);
61
0
  const Rule &R = T->Rules[RID];
62
0
  OS << symbolName(R.Target) << " :=";
63
0
  for (unsigned I = 0; I < R.Size; ++I) {
64
0
    OS << " " << symbolName(R.Sequence[I]);
65
0
    if (R.RecoveryIndex == I)
66
0
      OS << " [recover=" << T->AttributeValues[R.Recovery] << "]";
67
0
  }
68
0
  if (R.Guarded)
69
0
    OS << " [guard]";
70
0
  return Result;
71
0
}
72
73
0
std::string Grammar::dumpRules(SymbolID SID) const {
74
0
  assert(isNonterminal(SID));
75
0
  std::string Result;
76
0
  const auto &Range = T->Nonterminals[SID].RuleRange;
77
0
  for (RuleID RID = Range.Start; RID < Range.End; ++RID)
78
0
    Result.append(dumpRule(RID)).push_back('\n');
79
0
  return Result;
80
0
}
81
82
0
std::string Grammar::dump() const {
83
0
  std::string Result;
84
0
  llvm::raw_string_ostream OS(Result);
85
0
  OS << "Nonterminals:\n";
86
0
  for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)
87
0
    OS << llvm::formatv("  {0} {1}\n", SID, symbolName(SID));
88
0
  OS << "Rules:\n";
89
0
  for (RuleID RID = 0; RID < T->Rules.size(); ++RID)
90
0
    OS << llvm::formatv("  {0} {1}\n", RID, dumpRule(RID));
91
0
  return OS.str();
92
0
}
93
94
2
std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &G) {
95
2
  std::vector<llvm::DenseSet<SymbolID>> FirstSets(
96
2
      G.table().Nonterminals.size());
97
7.07k
  auto ExpandFirstSet = [&FirstSets](SymbolID Target, SymbolID First) {
98
7.07k
    assert(isNonterminal(Target));
99
7.07k
    if (isToken(First))
100
3.37k
      return FirstSets[Target].insert(First).second;
101
3.69k
    bool Changed = false;
102
3.69k
    for (SymbolID SID : FirstSets[First])
103
57.9k
      Changed |= FirstSets[Target].insert(SID).second;
104
3.69k
    return Changed;
105
7.07k
  };
106
107
  // A rule S := T ... implies elements in FIRST(S):
108
  //  - if T is a terminal, FIRST(S) contains T
109
  //  - if T is a nonterminal, FIRST(S) contains FIRST(T)
110
  // Since FIRST(T) may not have been fully computed yet, FIRST(S) itself may
111
  // end up being incomplete.
112
  // We iterate until we hit a fixed point.
113
  // (This isn't particularly efficient, but table building isn't on the
114
  // critical path).
115
2
  bool Changed = true;
116
10
  while (Changed) {
117
8
    Changed = false;
118
8
    for (const auto &R : G.table().Rules)
119
      // We only need to consider the first element because symbols are
120
      // non-nullable.
121
7.07k
      Changed |= ExpandFirstSet(R.Target, R.seq().front());
122
8
  }
123
2
  return FirstSets;
124
2
}
125
126
2
std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) {
127
2
  auto FirstSets = firstSets(G);
128
2
  std::vector<llvm::DenseSet<SymbolID>> FollowSets(
129
2
      G.table().Nonterminals.size());
130
  // Expand the follow set of a nonterminal symbol Y by adding all from the
131
  // given symbol set.
132
2
  auto ExpandFollowSet = [&FollowSets](SymbolID Y,
133
46.6k
                                       const llvm::DenseSet<SymbolID> &ToAdd) {
134
46.6k
    assert(isNonterminal(Y));
135
0
    bool Changed = false;
136
46.6k
    for (SymbolID F : ToAdd)
137
821k
      Changed |= FollowSets[Y].insert(F).second;
138
46.6k
    return Changed;
139
46.6k
  };
140
  // Follow sets is computed based on the following 3 rules, the computation
141
  // is completed at a fixed point where there is no more new symbols can be
142
  // added to any of the follow sets.
143
  //
144
  // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the
145
  // augmented grammar, in our case it is '_'.
146
2
  FollowSets[G.underscore()].insert(tokenSymbol(tok::eof));
147
2
  bool Changed = true;
148
46
  while (Changed) {
149
44
    Changed = false;
150
38.8k
    for (const auto &R : G.table().Rules) {
151
      // Rule 2: for a rule X := ... Y Z, we add all symbols from FIRST(Z) to
152
      // FOLLOW(Y).
153
90.6k
      for (size_t I = 0; I + 1 < R.seq().size(); ++I) {
154
51.7k
        if (isToken(R.seq()[I]))
155
26.5k
          continue;
156
        // We only need to consider the next symbol because symbols are
157
        // non-nullable.
158
25.2k
        SymbolID Next = R.seq()[I + 1];
159
25.2k
        if (isToken(Next))
160
          // First set for a terminal is itself.
161
15.3k
          Changed |= ExpandFollowSet(R.seq()[I], {Next});
162
9.85k
        else
163
9.85k
          Changed |= ExpandFollowSet(R.seq()[I], FirstSets[Next]);
164
25.2k
      }
165
      // Rule 3: for a rule X := ... Z, we add all symbols from FOLLOW(X) to
166
      // FOLLOW(Z).
167
38.8k
      SymbolID Z = R.seq().back();
168
38.8k
      if (isNonterminal(Z))
169
21.4k
        Changed |= ExpandFollowSet(Z, FollowSets[R.Target]);
170
38.8k
    }
171
44
  }
172
2
  return FollowSets;
173
2
}
174
175
1
static llvm::ArrayRef<std::string> getTerminalNames() {
176
1
  static const auto &TerminalNames = []() {
177
1
    auto TerminalNames = new std::string[NumTerminals];
178
57
#define PUNCTUATOR(Tok, Spelling) TerminalNames[tok::Tok] = Spelling;
179
1
#define KEYWORD(Keyword, Condition)                                            \
180
323
  TerminalNames[tok::kw_##Keyword] = llvm::StringRef(#Keyword).upper();
181
63
#define TOK(Tok) TerminalNames[tok::Tok] = llvm::StringRef(#Tok).upper();
182
1
#include "clang/Basic/TokenKinds.def"
183
1
    return llvm::ArrayRef(TerminalNames, NumTerminals);
184
1
  }();
185
1
  return TerminalNames;
186
1
}
187
1
GrammarTable::GrammarTable() : Terminals(getTerminalNames()) {}
188
189
} // namespace pseudo
190
} // namespace clang