Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang-tools-extra/pseudo/lib/Forest.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- Forest.cpp - Parse forest  ------------------------------*- C++-*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang-pseudo/Forest.h"
10
#include "clang-pseudo/Token.h"
11
#include "llvm/ADT/ArrayRef.h"
12
#include "llvm/ADT/STLExtras.h"
13
#include "llvm/Support/FormatVariadic.h"
14
#include <optional>
15
16
namespace clang {
17
namespace pseudo {
18
19
0
void ForestNode::RecursiveIterator::operator++() {
20
0
  auto C = Cur->children();
21
  // Try to find a child of the current node to descend into.
22
0
  for (unsigned I = 0; I < C.size(); ++I) {
23
0
    if (Seen.insert(C[I]).second) {
24
0
      Stack.push_back({Cur, I});
25
0
      Cur = C[I];
26
0
      return;
27
0
    }
28
0
  }
29
  // Try to find a sibling af an ancestor to advance to.
30
0
  for (; !Stack.empty(); Stack.pop_back()) {
31
0
    C = Stack.back().Parent->children();
32
0
    unsigned &Index = Stack.back().ChildIndex;
33
0
    while (++Index < C.size()) {
34
0
      if (Seen.insert(C[Index]).second) {
35
0
        Cur = C[Index];
36
0
        return;
37
0
      }
38
0
    }
39
0
  }
40
0
  Cur = nullptr;
41
0
}
42
43
llvm::iterator_range<ForestNode::RecursiveIterator>
44
0
ForestNode::descendants() const {
45
0
  return {RecursiveIterator(this), RecursiveIterator()};
46
0
}
47
48
0
std::string ForestNode::dump(const Grammar &G) const {
49
0
  switch (kind()) {
50
0
  case Ambiguous:
51
0
    return llvm::formatv("{0} := <ambiguous>", G.symbolName(symbol()));
52
0
  case Terminal:
53
0
    return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()),
54
0
                         startTokenIndex());
55
0
  case Sequence:
56
0
    return G.dumpRule(rule());
57
0
  case Opaque:
58
0
    return llvm::formatv("{0} := <opaque>", G.symbolName(symbol()));
59
0
  }
60
0
  llvm_unreachable("Unhandled node kind!");
61
0
}
62
63
std::string ForestNode::dumpRecursive(const Grammar &G,
64
0
                                      bool Abbreviated) const {
65
0
  using llvm::formatv;
66
0
  Token::Index MaxToken = 0;
67
  // Count visits of nodes so we can mark those seen multiple times.
68
0
  llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
69
0
  std::function<void(const ForestNode *)> CountVisits =
70
0
      [&](const ForestNode *P) {
71
0
        MaxToken = std::max(MaxToken, P->startTokenIndex());
72
0
        if (VisitCounts[P]++ > 0)
73
0
          return; // Don't count children as multiply visited.
74
0
        if (P->kind() == Ambiguous)
75
0
          llvm::for_each(P->alternatives(), CountVisits);
76
0
        else if (P->kind() == Sequence)
77
0
          llvm::for_each(P->elements(), CountVisits);
78
0
      };
79
0
  CountVisits(this);
80
81
0
  unsigned IndexWidth = std::max(3, (int)std::to_string(MaxToken).size());
82
  // e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
83
0
  std::string RangeFormat = formatv("[{{0,{0}}, {{1,{0}}) ", IndexWidth);
84
85
  // The box-drawing characters that should be added as a child is rendered.
86
0
  struct LineDecoration {
87
0
    std::string Prefix;         // Prepended to every line.
88
0
    llvm::StringRef First;      // added to the child's line.
89
0
    llvm::StringRef Subsequent; // added to descendants' lines.
90
0
  };
91
92
  // We print a "#<id>" for nonterminal forest nodes that are being dumped
93
  // multiple times.
94
0
  llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
95
0
  std::string Result;
96
0
  constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
97
0
  std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>,
98
0
                     LineDecoration &LineDec)>
99
0
      Dump = [&](const ForestNode *P, Token::Index End,
100
0
                 std::optional<SymbolID> ElidedParent, LineDecoration LineDec) {
101
0
        bool SharedNode = VisitCounts.find(P)->getSecond() > 1;
102
0
        llvm::ArrayRef<const ForestNode *> Children;
103
0
        auto EndOfElement = [&](size_t ChildIndex) {
104
0
          return ChildIndex + 1 == Children.size()
105
0
                     ? End
106
0
                     : Children[ChildIndex + 1]->startTokenIndex();
107
0
        };
108
0
        if (P->kind() == Ambiguous) {
109
0
          Children = P->alternatives();
110
0
        } else if (P->kind() == Sequence) {
111
0
          Children = P->elements();
112
0
          if (Abbreviated) {
113
            // Abbreviate chains of trivial sequence nodes.
114
            //    A := B, B := C, C := D, D := X Y Z
115
            // becomes
116
            //    A~D := X Y Z
117
            //
118
            // We can't hide nodes that appear multiple times in the tree,
119
            // because we need to call out their identity with IDs.
120
0
            if (Children.size() == 1 && !SharedNode) {
121
0
              assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
122
0
                     EndOfElement(0) == End);
123
0
              return Dump(Children[0], End,
124
0
                          /*ElidedParent=*/ElidedParent.value_or(P->symbol()),
125
0
                          LineDec);
126
0
            }
127
0
          }
128
0
        }
129
130
0
        if (End == KEnd)
131
0
          Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), "end");
132
0
        else
133
0
          Result += formatv(RangeFormat.c_str(), P->startTokenIndex(), End);
134
0
        Result += LineDec.Prefix;
135
0
        Result += LineDec.First;
136
0
        if (ElidedParent) {
137
0
          Result += G.symbolName(*ElidedParent);
138
0
          Result += "~";
139
0
        }
140
141
0
        if (SharedNode && P->kind() != ForestNode::Terminal) {
142
0
          auto It = ReferenceIds.try_emplace(P, ReferenceIds.size() + 1);
143
0
          bool First = It.second;
144
0
          unsigned ID = It.first->second;
145
146
          // The first time, print as #1. Later, =#1.
147
0
          if (First) {
148
0
            Result += formatv("{0} #{1}", P->dump(G), ID);
149
0
          } else {
150
0
            Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID);
151
0
            Children = {}; // Don't walk the children again.
152
0
          }
153
0
        } else {
154
0
          Result.append(P->dump(G));
155
0
        }
156
0
        Result.push_back('\n');
157
158
0
        auto OldPrefixSize = LineDec.Prefix.size();
159
0
        LineDec.Prefix += LineDec.Subsequent;
160
0
        for (size_t I = 0; I < Children.size(); ++I) {
161
0
          if (I == Children.size() - 1) {
162
0
            LineDec.First = "└─";
163
0
            LineDec.Subsequent = "  ";
164
0
          } else {
165
0
            LineDec.First = "├─";
166
0
            LineDec.Subsequent = "│ ";
167
0
          }
168
0
          Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
169
0
               std::nullopt, LineDec);
170
0
        }
171
0
        LineDec.Prefix.resize(OldPrefixSize);
172
0
      };
173
0
  LineDecoration LineDec;
174
0
  Dump(this, KEnd, std::nullopt, LineDec);
175
0
  return Result;
176
0
}
177
178
llvm::ArrayRef<ForestNode>
179
9.56k
ForestArena::createTerminals(const TokenStream &Code) {
180
9.56k
  ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size() + 1);
181
9.56k
  size_t Index = 0;
182
53.1M
  for (const auto &T : Code.tokens()) {
183
53.1M
    new (&Terminals[Index])
184
53.1M
        ForestNode(ForestNode::Terminal, tokenSymbol(T.Kind),
185
53.1M
                   /*Start=*/Index, /*TerminalData*/ 0);
186
53.1M
    ++Index;
187
53.1M
  }
188
  // Include an `eof` terminal.
189
  // This is important to drive the final shift/recover/reduce loop.
190
9.56k
  new (&Terminals[Index])
191
9.56k
      ForestNode(ForestNode::Terminal, tokenSymbol(tok::eof),
192
9.56k
                 /*Start=*/Index, /*TerminalData*/ 0);
193
9.56k
  ++Index;
194
9.56k
  NodeCount = Index;
195
9.56k
  return llvm::ArrayRef(Terminals, Index);
196
9.56k
}
197
198
} // namespace pseudo
199
} // namespace clang