/src/llvm-project/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GCNMinRegStrategy.cpp ----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file defines and implements the class GCNMinRegScheduler, which |
11 | | /// implements an experimental, simple scheduler whose main goal is to learn |
12 | | /// ways about consuming less possible registers for a region. |
13 | | /// |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/CodeGen/ScheduleDAG.h" |
17 | | using namespace llvm; |
18 | | |
19 | | #define DEBUG_TYPE "machine-scheduler" |
20 | | |
21 | | namespace { |
22 | | |
23 | | class GCNMinRegScheduler { |
24 | | struct Candidate : ilist_node<Candidate> { |
25 | | const SUnit *SU; |
26 | | int Priority; |
27 | | |
28 | | Candidate(const SUnit *SU_, int Priority_ = 0) |
29 | 0 | : SU(SU_), Priority(Priority_) {} |
30 | | }; |
31 | | |
32 | | SpecificBumpPtrAllocator<Candidate> Alloc; |
33 | | using Queue = simple_ilist<Candidate>; |
34 | | Queue RQ; // Ready queue |
35 | | |
36 | | std::vector<unsigned> NumPreds; |
37 | | |
38 | 0 | bool isScheduled(const SUnit *SU) const { |
39 | 0 | assert(!SU->isBoundaryNode()); |
40 | 0 | return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max(); |
41 | 0 | } |
42 | | |
43 | 0 | void setIsScheduled(const SUnit *SU) { |
44 | 0 | assert(!SU->isBoundaryNode()); |
45 | 0 | NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max(); |
46 | 0 | } |
47 | | |
48 | 0 | unsigned getNumPreds(const SUnit *SU) const { |
49 | 0 | assert(!SU->isBoundaryNode()); |
50 | 0 | assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max()); |
51 | 0 | return NumPreds[SU->NodeNum]; |
52 | 0 | } |
53 | | |
54 | 0 | unsigned decNumPreds(const SUnit *SU) { |
55 | 0 | assert(!SU->isBoundaryNode()); |
56 | 0 | assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max()); |
57 | 0 | return --NumPreds[SU->NodeNum]; |
58 | 0 | } |
59 | | |
60 | | void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits); |
61 | | |
62 | | int getReadySuccessors(const SUnit *SU) const; |
63 | | int getNotReadySuccessors(const SUnit *SU) const; |
64 | | |
65 | | template <typename Calc> |
66 | | unsigned findMax(unsigned Num, Calc C); |
67 | | |
68 | | Candidate* pickCandidate(); |
69 | | |
70 | | void bumpPredsPriority(const SUnit *SchedSU, int Priority); |
71 | | void releaseSuccessors(const SUnit* SU, int Priority); |
72 | | |
73 | | public: |
74 | | std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, |
75 | | const ScheduleDAG &DAG); |
76 | | }; |
77 | | |
78 | | } // end anonymous namespace |
79 | | |
80 | 0 | void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { |
81 | 0 | NumPreds.resize(SUnits.size()); |
82 | 0 | for (unsigned I = 0; I < SUnits.size(); ++I) |
83 | 0 | NumPreds[I] = SUnits[I].NumPredsLeft; |
84 | 0 | } |
85 | | |
86 | 0 | int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const { |
87 | 0 | unsigned NumSchedSuccs = 0; |
88 | 0 | for (auto SDep : SU->Succs) { |
89 | 0 | bool wouldBeScheduled = true; |
90 | 0 | for (auto PDep : SDep.getSUnit()->Preds) { |
91 | 0 | auto PSU = PDep.getSUnit(); |
92 | 0 | assert(!PSU->isBoundaryNode()); |
93 | 0 | if (PSU != SU && !isScheduled(PSU)) { |
94 | 0 | wouldBeScheduled = false; |
95 | 0 | break; |
96 | 0 | } |
97 | 0 | } |
98 | 0 | NumSchedSuccs += wouldBeScheduled ? 1 : 0; |
99 | 0 | } |
100 | 0 | return NumSchedSuccs; |
101 | 0 | } |
102 | | |
103 | 0 | int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const { |
104 | 0 | return SU->Succs.size() - getReadySuccessors(SU); |
105 | 0 | } |
106 | | |
107 | | template <typename Calc> |
108 | 0 | unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { |
109 | 0 | assert(!RQ.empty() && Num <= RQ.size()); |
110 | | |
111 | 0 | using T = decltype(C(*RQ.begin())) ; |
112 | |
|
113 | 0 | T Max = std::numeric_limits<T>::min(); |
114 | 0 | unsigned NumMax = 0; |
115 | 0 | for (auto I = RQ.begin(); Num; --Num) { |
116 | 0 | T Cur = C(*I); |
117 | 0 | if (Cur >= Max) { |
118 | 0 | if (Cur > Max) { |
119 | 0 | Max = Cur; |
120 | 0 | NumMax = 1; |
121 | 0 | } else |
122 | 0 | ++NumMax; |
123 | 0 | auto &Cand = *I++; |
124 | 0 | RQ.remove(Cand); |
125 | 0 | RQ.push_front(Cand); |
126 | 0 | continue; |
127 | 0 | } |
128 | 0 | ++I; |
129 | 0 | } |
130 | 0 | return NumMax; |
131 | 0 | } Unexecuted instantiation: GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_0>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_0) Unexecuted instantiation: GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_1>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_1) Unexecuted instantiation: GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_2>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_2) Unexecuted instantiation: GCNMinRegStrategy.cpp:unsigned int (anonymous namespace)::GCNMinRegScheduler::findMax<(anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_3>(unsigned int, (anonymous namespace)::GCNMinRegScheduler::pickCandidate()::$_3) |
132 | | |
133 | 0 | GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() { |
134 | 0 | do { |
135 | 0 | unsigned Num = RQ.size(); |
136 | 0 | if (Num == 1) break; |
137 | | |
138 | 0 | LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num |
139 | 0 | << '\n'); |
140 | 0 | Num = findMax(Num, [=](const Candidate &C) { return C.Priority; }); |
141 | 0 | if (Num == 1) break; |
142 | | |
143 | 0 | LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among " |
144 | 0 | << Num << '\n'); |
145 | 0 | Num = findMax(Num, [=](const Candidate &C) { |
146 | 0 | auto SU = C.SU; |
147 | 0 | int Res = getNotReadySuccessors(SU); |
148 | 0 | LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready " |
149 | 0 | << Res << " successors, metric = " << -Res << '\n'); |
150 | 0 | return -Res; |
151 | 0 | }); |
152 | 0 | if (Num == 1) break; |
153 | | |
154 | 0 | LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num |
155 | 0 | << '\n'); |
156 | 0 | Num = findMax(Num, [=](const Candidate &C) { |
157 | 0 | auto SU = C.SU; |
158 | 0 | auto Res = getReadySuccessors(SU); |
159 | 0 | LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res |
160 | 0 | << " successors, metric = " << Res << '\n'); |
161 | 0 | return Res; |
162 | 0 | }); |
163 | 0 | if (Num == 1) break; |
164 | | |
165 | 0 | Num = Num ? Num : RQ.size(); |
166 | 0 | LLVM_DEBUG( |
167 | 0 | dbgs() |
168 | 0 | << "\nCan't find best candidate, selecting in program order among " |
169 | 0 | << Num << '\n'); |
170 | 0 | Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; }); |
171 | 0 | assert(Num == 1); |
172 | 0 | } while (false); |
173 | | |
174 | 0 | return &RQ.front(); |
175 | 0 | } |
176 | | |
177 | 0 | void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) { |
178 | 0 | SmallPtrSet<const SUnit*, 32> Set; |
179 | 0 | for (const auto &S : SchedSU->Succs) { |
180 | 0 | if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) || |
181 | 0 | S.getKind() != SDep::Data) |
182 | 0 | continue; |
183 | 0 | for (const auto &P : S.getSUnit()->Preds) { |
184 | 0 | auto PSU = P.getSUnit(); |
185 | 0 | assert(!PSU->isBoundaryNode()); |
186 | 0 | if (PSU != SchedSU && !isScheduled(PSU)) { |
187 | 0 | Set.insert(PSU); |
188 | 0 | } |
189 | 0 | } |
190 | 0 | } |
191 | 0 | SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end()); |
192 | 0 | while (!Worklist.empty()) { |
193 | 0 | auto SU = Worklist.pop_back_val(); |
194 | 0 | assert(!SU->isBoundaryNode()); |
195 | 0 | for (const auto &P : SU->Preds) { |
196 | 0 | if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) && |
197 | 0 | Set.insert(P.getSUnit()).second) |
198 | 0 | Worklist.push_back(P.getSUnit()); |
199 | 0 | } |
200 | 0 | } |
201 | 0 | LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum |
202 | 0 | << ")'s non-ready successors of " << Priority |
203 | 0 | << " priority in ready queue: "); |
204 | 0 | for (auto &C : RQ) { |
205 | 0 | if (Set.count(C.SU)) { |
206 | 0 | C.Priority = Priority; |
207 | 0 | LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')'); |
208 | 0 | } |
209 | 0 | } |
210 | 0 | LLVM_DEBUG(dbgs() << '\n'); |
211 | 0 | } |
212 | | |
213 | 0 | void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) { |
214 | 0 | for (const auto &S : SU->Succs) { |
215 | 0 | auto SuccSU = S.getSUnit(); |
216 | 0 | if (S.isWeak()) |
217 | 0 | continue; |
218 | 0 | assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0); |
219 | 0 | if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0) |
220 | 0 | RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority)); |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | std::vector<const SUnit*> |
225 | | GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots, |
226 | 0 | const ScheduleDAG &DAG) { |
227 | 0 | const auto &SUnits = DAG.SUnits; |
228 | 0 | std::vector<const SUnit*> Schedule; |
229 | 0 | Schedule.reserve(SUnits.size()); |
230 | |
|
231 | 0 | initNumPreds(SUnits); |
232 | |
|
233 | 0 | int StepNo = 0; |
234 | |
|
235 | 0 | for (const auto *SU : TopRoots) { |
236 | 0 | RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo)); |
237 | 0 | } |
238 | 0 | releaseSuccessors(&DAG.EntrySU, StepNo); |
239 | |
|
240 | 0 | while (!RQ.empty()) { |
241 | 0 | LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo |
242 | 0 | << "\n" |
243 | 0 | "Ready queue:"; |
244 | 0 | for (auto &C |
245 | 0 | : RQ) dbgs() |
246 | 0 | << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')'; |
247 | 0 | dbgs() << '\n';); |
248 | |
|
249 | 0 | auto C = pickCandidate(); |
250 | 0 | assert(C); |
251 | 0 | RQ.remove(*C); |
252 | 0 | auto SU = C->SU; |
253 | 0 | LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); |
254 | |
|
255 | 0 | releaseSuccessors(SU, StepNo); |
256 | 0 | Schedule.push_back(SU); |
257 | 0 | setIsScheduled(SU); |
258 | |
|
259 | 0 | if (getReadySuccessors(SU) == 0) |
260 | 0 | bumpPredsPriority(SU, StepNo); |
261 | |
|
262 | 0 | ++StepNo; |
263 | 0 | } |
264 | 0 | assert(SUnits.size() == Schedule.size()); |
265 | | |
266 | 0 | return Schedule; |
267 | 0 | } |
268 | | |
269 | | namespace llvm { |
270 | | |
271 | | std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots, |
272 | 0 | const ScheduleDAG &DAG) { |
273 | 0 | GCNMinRegScheduler S; |
274 | 0 | return S.schedule(TopRoots, DAG); |
275 | 0 | } |
276 | | |
277 | | } // end namespace llvm |