/src/llvm-project/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "PPCMachineScheduler.h" |
10 | | #include "MCTargetDesc/PPCMCTargetDesc.h" |
11 | | |
12 | | using namespace llvm; |
13 | | |
14 | | static cl::opt<bool> |
15 | | DisableAddiLoadHeuristic("disable-ppc-sched-addi-load", |
16 | | cl::desc("Disable scheduling addi instruction before" |
17 | | "load for ppc"), cl::Hidden); |
18 | | static cl::opt<bool> |
19 | | EnableAddiHeuristic("ppc-postra-bias-addi", |
20 | | cl::desc("Enable scheduling addi instruction as early" |
21 | | "as possible post ra"), |
22 | | cl::Hidden, cl::init(true)); |
23 | | |
24 | 0 | static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) { |
25 | 0 | return Cand.SU->getInstr()->getOpcode() == PPC::ADDI || |
26 | 0 | Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; |
27 | 0 | } |
28 | | |
29 | | bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, |
30 | | SchedCandidate &TryCand, |
31 | 0 | SchedBoundary &Zone) const { |
32 | 0 | if (DisableAddiLoadHeuristic) |
33 | 0 | return false; |
34 | | |
35 | 0 | SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; |
36 | 0 | SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; |
37 | 0 | if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { |
38 | 0 | TryCand.Reason = Stall; |
39 | 0 | return true; |
40 | 0 | } |
41 | 0 | if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) { |
42 | 0 | TryCand.Reason = NoCand; |
43 | 0 | return true; |
44 | 0 | } |
45 | | |
46 | 0 | return false; |
47 | 0 | } |
48 | | |
49 | | bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
50 | | SchedCandidate &TryCand, |
51 | 0 | SchedBoundary *Zone) const { |
52 | | // From GenericScheduler::tryCandidate |
53 | | |
54 | | // Initialize the candidate if needed. |
55 | 0 | if (!Cand.isValid()) { |
56 | 0 | TryCand.Reason = NodeOrder; |
57 | 0 | return true; |
58 | 0 | } |
59 | | |
60 | | // Bias PhysReg Defs and copies to their uses and defined respectively. |
61 | 0 | if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), |
62 | 0 | biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) |
63 | 0 | return TryCand.Reason != NoCand; |
64 | | |
65 | | // Avoid exceeding the target's limit. |
66 | 0 | if (DAG->isTrackingPressure() && |
67 | 0 | tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, |
68 | 0 | RegExcess, TRI, DAG->MF)) |
69 | 0 | return TryCand.Reason != NoCand; |
70 | | |
71 | | // Avoid increasing the max critical pressure in the scheduled region. |
72 | 0 | if (DAG->isTrackingPressure() && |
73 | 0 | tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, |
74 | 0 | TryCand, Cand, RegCritical, TRI, DAG->MF)) |
75 | 0 | return TryCand.Reason != NoCand; |
76 | | |
77 | | // We only compare a subset of features when comparing nodes between |
78 | | // Top and Bottom boundary. Some properties are simply incomparable, in many |
79 | | // other instances we should only override the other boundary if something |
80 | | // is a clear good pick on one boundary. Skip heuristics that are more |
81 | | // "tie-breaking" in nature. |
82 | 0 | bool SameBoundary = Zone != nullptr; |
83 | 0 | if (SameBoundary) { |
84 | | // For loops that are acyclic path limited, aggressively schedule for |
85 | | // latency. Within an single cycle, whenever CurrMOps > 0, allow normal |
86 | | // heuristics to take precedence. |
87 | 0 | if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && |
88 | 0 | tryLatency(TryCand, Cand, *Zone)) |
89 | 0 | return TryCand.Reason != NoCand; |
90 | | |
91 | | // Prioritize instructions that read unbuffered resources by stall cycles. |
92 | 0 | if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), |
93 | 0 | Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) |
94 | 0 | return TryCand.Reason != NoCand; |
95 | 0 | } |
96 | | |
97 | | // Keep clustered nodes together to encourage downstream peephole |
98 | | // optimizations which may reduce resource requirements. |
99 | | // |
100 | | // This is a best effort to set things up for a post-RA pass. Optimizations |
101 | | // like generating loads of multiple registers should ideally be done within |
102 | | // the scheduler pass by combining the loads during DAG postprocessing. |
103 | 0 | const SUnit *CandNextClusterSU = |
104 | 0 | Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); |
105 | 0 | const SUnit *TryCandNextClusterSU = |
106 | 0 | TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); |
107 | 0 | if (tryGreater(TryCand.SU == TryCandNextClusterSU, |
108 | 0 | Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) |
109 | 0 | return TryCand.Reason != NoCand; |
110 | | |
111 | 0 | if (SameBoundary) { |
112 | | // Weak edges are for clustering and other constraints. |
113 | 0 | if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), |
114 | 0 | getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) |
115 | 0 | return TryCand.Reason != NoCand; |
116 | 0 | } |
117 | | |
118 | | // Avoid increasing the max pressure of the entire region. |
119 | 0 | if (DAG->isTrackingPressure() && |
120 | 0 | tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, |
121 | 0 | Cand, RegMax, TRI, DAG->MF)) |
122 | 0 | return TryCand.Reason != NoCand; |
123 | | |
124 | 0 | if (SameBoundary) { |
125 | | // Avoid critical resource consumption and balance the schedule. |
126 | 0 | TryCand.initResourceDelta(DAG, SchedModel); |
127 | 0 | if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, |
128 | 0 | TryCand, Cand, ResourceReduce)) |
129 | 0 | return TryCand.Reason != NoCand; |
130 | 0 | if (tryGreater(TryCand.ResDelta.DemandedResources, |
131 | 0 | Cand.ResDelta.DemandedResources, TryCand, Cand, |
132 | 0 | ResourceDemand)) |
133 | 0 | return TryCand.Reason != NoCand; |
134 | | |
135 | | // Avoid serializing long latency dependence chains. |
136 | | // For acyclic path limited loops, latency was already checked above. |
137 | 0 | if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && |
138 | 0 | !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) |
139 | 0 | return TryCand.Reason != NoCand; |
140 | | |
141 | | // Fall through to original instruction order. |
142 | 0 | if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || |
143 | 0 | (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { |
144 | 0 | TryCand.Reason = NodeOrder; |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | | // GenericScheduler::tryCandidate end |
149 | | |
150 | | // Add powerpc specific heuristic only when TryCand isn't selected or |
151 | | // selected as node order. |
152 | 0 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
153 | 0 | return true; |
154 | | |
155 | | // There are some benefits to schedule the ADDI before the load to hide the |
156 | | // latency, as RA may create a true dependency between the load and addi. |
157 | 0 | if (SameBoundary) { |
158 | 0 | if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) |
159 | 0 | return TryCand.Reason != NoCand; |
160 | 0 | } |
161 | | |
162 | 0 | return TryCand.Reason != NoCand; |
163 | 0 | } |
164 | | |
165 | | bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, |
166 | 0 | SchedCandidate &TryCand) const { |
167 | 0 | if (!EnableAddiHeuristic) |
168 | 0 | return false; |
169 | | |
170 | 0 | if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) { |
171 | 0 | TryCand.Reason = Stall; |
172 | 0 | return true; |
173 | 0 | } |
174 | 0 | return false; |
175 | 0 | } |
176 | | |
177 | | bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, |
178 | 0 | SchedCandidate &TryCand) { |
179 | | // From PostGenericScheduler::tryCandidate |
180 | | |
181 | | // Initialize the candidate if needed. |
182 | 0 | if (!Cand.isValid()) { |
183 | 0 | TryCand.Reason = NodeOrder; |
184 | 0 | return true; |
185 | 0 | } |
186 | | |
187 | | // Prioritize instructions that read unbuffered resources by stall cycles. |
188 | 0 | if (tryLess(Top.getLatencyStallCycles(TryCand.SU), |
189 | 0 | Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) |
190 | 0 | return TryCand.Reason != NoCand; |
191 | | |
192 | | // Keep clustered nodes together. |
193 | 0 | if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), |
194 | 0 | Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) |
195 | 0 | return TryCand.Reason != NoCand; |
196 | | |
197 | | // Avoid critical resource consumption and balance the schedule. |
198 | 0 | if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, |
199 | 0 | TryCand, Cand, ResourceReduce)) |
200 | 0 | return TryCand.Reason != NoCand; |
201 | 0 | if (tryGreater(TryCand.ResDelta.DemandedResources, |
202 | 0 | Cand.ResDelta.DemandedResources, TryCand, Cand, |
203 | 0 | ResourceDemand)) |
204 | 0 | return TryCand.Reason != NoCand; |
205 | | |
206 | | // Avoid serializing long latency dependence chains. |
207 | 0 | if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { |
208 | 0 | return TryCand.Reason != NoCand; |
209 | 0 | } |
210 | | |
211 | | // Fall through to original instruction order. |
212 | 0 | if (TryCand.SU->NodeNum < Cand.SU->NodeNum) |
213 | 0 | TryCand.Reason = NodeOrder; |
214 | | |
215 | | // PostGenericScheduler::tryCandidate end |
216 | | |
217 | | // Add powerpc post ra specific heuristic only when TryCand isn't selected or |
218 | | // selected as node order. |
219 | 0 | if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) |
220 | 0 | return true; |
221 | | |
222 | | // There are some benefits to schedule the ADDI as early as possible post ra |
223 | | // to avoid stalled by vector instructions which take up all the hw units. |
224 | | // And ADDI is usually used to post inc the loop indvar, which matters the |
225 | | // performance. |
226 | 0 | if (biasAddiCandidate(Cand, TryCand)) |
227 | 0 | return TryCand.Reason != NoCand; |
228 | | |
229 | 0 | return TryCand.Reason != NoCand; |
230 | 0 | } |
231 | | |
232 | 0 | void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { |
233 | | // Custom PPC PostRA specific behavior here. |
234 | 0 | PostGenericScheduler::enterMBB(MBB); |
235 | 0 | } |
236 | | |
237 | 0 | void PPCPostRASchedStrategy::leaveMBB() { |
238 | | // Custom PPC PostRA specific behavior here. |
239 | 0 | PostGenericScheduler::leaveMBB(); |
240 | 0 | } |
241 | | |
242 | 0 | void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) { |
243 | | // Custom PPC PostRA specific initialization here. |
244 | 0 | PostGenericScheduler::initialize(Dag); |
245 | 0 | } |
246 | | |
247 | 0 | SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) { |
248 | | // Custom PPC PostRA specific scheduling here. |
249 | 0 | return PostGenericScheduler::pickNode(IsTopNode); |
250 | 0 | } |
251 | | |