/src/llvm-project/clang-tools-extra/pseudo/lib/Forest.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "clang-pseudo/Forest.h" |
10 | | #include "clang-pseudo/Token.h" |
11 | | #include "llvm/ADT/ArrayRef.h" |
12 | | #include "llvm/ADT/STLExtras.h" |
13 | | #include "llvm/Support/FormatVariadic.h" |
14 | | #include <optional> |
15 | | |
16 | | namespace clang { |
17 | | namespace pseudo { |
18 | | |
19 | 0 | void ForestNode::RecursiveIterator::operator++() { |
20 | 0 | auto C = Cur->children(); |
21 | | // Try to find a child of the current node to descend into. |
22 | 0 | for (unsigned I = 0; I < C.size(); ++I) { |
23 | 0 | if (Seen.insert(C[I]).second) { |
24 | 0 | Stack.push_back({Cur, I}); |
25 | 0 | Cur = C[I]; |
26 | 0 | return; |
27 | 0 | } |
28 | 0 | } |
29 | | // Try to find a sibling af an ancestor to advance to. |
30 | 0 | for (; !Stack.empty(); Stack.pop_back()) { |
31 | 0 | C = Stack.back().Parent->children(); |
32 | 0 | unsigned &Index = Stack.back().ChildIndex; |
33 | 0 | while (++Index < C.size()) { |
34 | 0 | if (Seen.insert(C[Index]).second) { |
35 | 0 | Cur = C[Index]; |
36 | 0 | return; |
37 | 0 | } |
38 | 0 | } |
39 | 0 | } |
40 | 0 | Cur = nullptr; |
41 | 0 | } |
42 | | |
43 | | llvm::iterator_range<ForestNode::RecursiveIterator> |
44 | 0 | ForestNode::descendants() const { |
45 | 0 | return {RecursiveIterator(this), RecursiveIterator()}; |
46 | 0 | } |
47 | | |
48 | 0 | std::string ForestNode::dump(const Grammar &G) const { |
49 | 0 | switch (kind()) { |
50 | 0 | case Ambiguous: |
51 | 0 | return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol())); |
52 | 0 | case Terminal: |
53 | 0 | return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), |
54 | 0 | startTokenIndex()); |
55 | 0 | case Sequence: |
56 | 0 | return G.dumpRule(rule()); |
57 | 0 | case Opaque: |
58 | 0 | return llvm::formatv("{0} := <opaque>", G.symbolName(symbol())); |
59 | 0 | } |
60 | 0 | llvm_unreachable("Unhandled node kind!"); |
61 | 0 | } |
62 | | |
63 | | std::string ForestNode::dumpRecursive(const Grammar &G, |
64 | 0 | bool Abbreviated) const { |
65 | 0 | using llvm::formatv; |
66 | 0 | Token::Index MaxToken = 0; |
67 | | // Count visits of nodes so we can mark those seen multiple times. |
68 | 0 | llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts; |
69 | 0 | std::function<void(const ForestNode *)> CountVisits = |
70 | 0 | [&](const ForestNode *P) { |
71 | 0 | MaxToken = std::max(MaxToken, P->startTokenIndex()); |
72 | 0 | if (VisitCounts[P]++ > 0) |
73 | 0 | return; // Don't count children as multiply visited. |
74 | 0 | if (P->kind() == Ambiguous) |
75 | 0 | llvm::for_each(P->alternatives(), CountVisits); |
76 | 0 | else if (P->kind() == Sequence) |
77 | 0 | llvm::for_each(P->elements(), CountVisits); |
78 | 0 | }; |
79 | 0 | CountVisits(this); |
80 | |
|
81 | 0 | unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size()); |
82 | | // e.g. "[{0,4}, {1,4})" if MaxToken is 5742. |
83 | 0 | std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth); |
84 | | |
85 | | // The box-drawing characters that should be added as a child is rendered. |
86 | 0 | struct LineDecoration { |
87 | 0 | std::string Prefix; // Prepended to every line. |
88 | 0 | llvm::StringRef First; // added to the child's line. |
89 | 0 | llvm::StringRef Subsequent; // added to descendants' lines. |
90 | 0 | }; |
91 | | |
92 | | // We print a "#<id>" for nonterminal forest nodes that are being dumped |
93 | | // multiple times. |
94 | 0 | llvm::DenseMap<const ForestNode *, size_t> ReferenceIds; |
95 | 0 | std::string Result; |
96 | 0 | constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max(); |
97 | 0 | std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>, |
98 | 0 | LineDecoration &LineDec)> |
99 | 0 | Dump = [&](const ForestNode *P, Token::Index End, |
100 | 0 | std::optional<SymbolID> ElidedParent, LineDecoration LineDec) { |
101 | 0 | bool SharedNode = VisitCounts.find(P)->getSecond() > 1; |
102 | 0 | llvm::ArrayRef<const ForestNode *> Children; |
103 | 0 | auto EndOfElement = [&](size_t ChildIndex) { |
104 | 0 | return ChildIndex + 1 == Children.size() |
105 | 0 | ? End |
106 | 0 | : Children[ChildIndex + 1]->startTokenIndex(); |
107 | 0 | }; |
108 | 0 | if (P->kind() == Ambiguous) { |
109 | 0 | Children = P->alternatives(); |
110 | 0 | } else if (P->kind() == Sequence) { |
111 | 0 | Children = P->elements(); |
112 | 0 | if (Abbreviated) { |
113 | | // Abbreviate chains of trivial sequence nodes. |
114 | | // A := B, B := C, C := D, D := X Y Z |
115 | | // becomes |
116 | | // A~D := X Y Z |
117 | | // |
118 | | // We can't hide nodes that appear multiple times in the tree, |
119 | | // because we need to call out their identity with IDs. |
120 | 0 | if (Children.size() == 1 && !SharedNode) { |
121 | 0 | assert(Children[0]->startTokenIndex() == P->startTokenIndex() && |
122 | 0 | EndOfElement(0) == End); |
123 | 0 | return Dump(Children[0], End, |
124 | 0 | /*ElidedParent=*/ElidedParent.value_or(P->symbol()), |
125 | 0 | LineDec); |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | 0 | if (End == KEnd) |
131 | 0 | Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end"); |
132 | 0 | else |
133 | 0 | Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End); |
134 | 0 | Result += LineDec.Prefix; |
135 | 0 | Result += LineDec.First; |
136 | 0 | if (ElidedParent) { |
137 | 0 | Result += G.symbolName(*ElidedParent); |
138 | 0 | Result += "~"; |
139 | 0 | } |
140 | |
|
141 | 0 | if (SharedNode && P->kind() != ForestNode::Terminal) { |
142 | 0 | auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1); |
143 | 0 | bool First = It.second; |
144 | 0 | unsigned ID = It.first->second; |
145 | | |
146 | | // The first time, print as #1. Later, =#1. |
147 | 0 | if (First) { |
148 | 0 | Result += formatv("{0} #{1}", P->dump(G), ID); |
149 | 0 | } else { |
150 | 0 | Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); |
151 | 0 | Children = {}; // Don't walk the children again. |
152 | 0 | } |
153 | 0 | } else { |
154 | 0 | Result.append(P->dump(G)); |
155 | 0 | } |
156 | 0 | Result.push_back('\n'); |
157 | |
|
158 | 0 | auto OldPrefixSize = LineDec.Prefix.size(); |
159 | 0 | LineDec.Prefix += LineDec.Subsequent; |
160 | 0 | for (size_t I = 0; I < Children.size(); ++I) { |
161 | 0 | if (I == Children.size() - 1) { |
162 | 0 | LineDec.First = "└─"; |
163 | 0 | LineDec.Subsequent = " "; |
164 | 0 | } else { |
165 | 0 | LineDec.First = "├─"; |
166 | 0 | LineDec.Subsequent = "│ "; |
167 | 0 | } |
168 | 0 | Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End, |
169 | 0 | std::nullopt, LineDec); |
170 | 0 | } |
171 | 0 | LineDec.Prefix.resize(OldPrefixSize); |
172 | 0 | }; |
173 | 0 | LineDecoration LineDec; |
174 | 0 | Dump(this, KEnd, std::nullopt, LineDec); |
175 | 0 | return Result; |
176 | 0 | } |
177 | | |
178 | | llvm::ArrayRef<ForestNode> |
179 | 9.56k | ForestArena::createTerminals(const TokenStream &Code) { |
180 | 9.56k | ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size() + 1); |
181 | 9.56k | size_t Index = 0; |
182 | 53.1M | for (const auto &T : Code.tokens()) { |
183 | 53.1M | new (&Terminals[Index]) |
184 | 53.1M | ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind), |
185 | 53.1M | /*Start=*/Index, /*TerminalData*/ 0); |
186 | 53.1M | ++Index; |
187 | 53.1M | } |
188 | | // Include an `eof` terminal. |
189 | | // This is important to drive the final shift/recover/reduce loop. |
190 | 9.56k | new (&Terminals[Index]) |
191 | 9.56k | ForestNode(ForestNode::Terminal, tokenSymbol(tok::eof), |
192 | 9.56k | /*Start=*/Index, /*TerminalData*/ 0); |
193 | 9.56k | ++Index; |
194 | 9.56k | NodeCount = Index; |
195 | 9.56k | return llvm::ArrayRef(Terminals, Index); |
196 | 9.56k | } |
197 | | |
198 | | } // namespace pseudo |
199 | | } // namespace clang |