Line data Source code
1 : // Copyright 2015 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/instruction-scheduler.h"
6 :
7 : #include "src/base/adapters.h"
8 : #include "src/base/utils/random-number-generator.h"
9 : #include "src/isolate.h"
10 :
11 : namespace v8 {
12 : namespace internal {
13 : namespace compiler {
14 :
15 325421 : void InstructionScheduler::SchedulingQueueBase::AddNode(
16 : ScheduleGraphNode* node) {
17 : // We keep the ready list sorted by total latency so that we can quickly find
18 : // the next best candidate to schedule.
19 : auto it = nodes_.begin();
20 528286 : while ((it != nodes_.end()) &&
21 115831 : ((*it)->total_latency() >= node->total_latency())) {
22 : ++it;
23 : }
24 325421 : nodes_.insert(it, node);
25 325421 : }
26 :
27 : InstructionScheduler::ScheduleGraphNode*
28 333829 : InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 : DCHECK(!IsEmpty());
30 : auto candidate = nodes_.end();
31 342349 : for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 : // We only consider instructions that have all their operands ready.
33 333941 : if (cycle >= (*iterator)->start_cycle()) {
34 : candidate = iterator;
35 : break;
36 : }
37 : }
38 :
39 333829 : if (candidate != nodes_.end()) {
40 325421 : ScheduleGraphNode* result = *candidate;
41 : nodes_.erase(candidate);
42 325421 : return result;
43 : }
44 :
45 : return nullptr;
46 : }
47 :
48 : InstructionScheduler::ScheduleGraphNode*
49 0 : InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50 : DCHECK(!IsEmpty());
51 : // Choose a random element from the ready list.
52 : auto candidate = nodes_.begin();
53 0 : std::advance(candidate, isolate()->random_number_generator()->NextInt(
54 : static_cast<int>(nodes_.size())));
55 0 : ScheduleGraphNode* result = *candidate;
56 : nodes_.erase(candidate);
57 0 : return result;
58 : }
59 :
60 325421 : InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61 : Instruction* instr)
62 : : instr_(instr),
63 : successors_(zone),
64 : unscheduled_predecessors_count_(0),
65 325421 : latency_(GetInstructionLatency(instr)),
66 : total_latency_(-1),
67 976263 : start_cycle_(-1) {}
68 :
69 0 : void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70 : ScheduleGraphNode* node) {
71 354584 : successors_.push_back(node);
72 354584 : node->unscheduled_predecessors_count_++;
73 0 : }
74 :
75 2201 : InstructionScheduler::InstructionScheduler(Zone* zone,
76 : InstructionSequence* sequence)
77 : : zone_(zone),
78 : sequence_(sequence),
79 : graph_(zone),
80 : last_side_effect_instr_(nullptr),
81 : pending_loads_(zone),
82 : last_live_in_reg_marker_(nullptr),
83 : last_deopt_or_trap_(nullptr),
84 8804 : operands_map_(zone) {}
85 :
86 130918 : void InstructionScheduler::StartBlock(RpoNumber rpo) {
87 : DCHECK(graph_.empty());
88 : DCHECK_NULL(last_side_effect_instr_);
89 : DCHECK(pending_loads_.empty());
90 : DCHECK_NULL(last_live_in_reg_marker_);
91 : DCHECK_NULL(last_deopt_or_trap_);
92 : DCHECK(operands_map_.empty());
93 130918 : sequence()->StartBlock(rpo);
94 130918 : }
95 :
96 130918 : void InstructionScheduler::EndBlock(RpoNumber rpo) {
97 130918 : if (FLAG_turbo_stress_instruction_scheduling) {
98 0 : ScheduleBlock<StressSchedulerQueue>();
99 : } else {
100 130918 : ScheduleBlock<CriticalPathFirstQueue>();
101 : }
102 130918 : sequence()->EndBlock(rpo);
103 : graph_.clear();
104 130918 : last_side_effect_instr_ = nullptr;
105 : pending_loads_.clear();
106 130918 : last_live_in_reg_marker_ = nullptr;
107 130918 : last_deopt_or_trap_ = nullptr;
108 : operands_map_.clear();
109 130918 : }
110 :
111 130918 : void InstructionScheduler::AddTerminator(Instruction* instr) {
112 130918 : ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
113 : // Make sure that basic block terminators are not moved by adding them
114 : // as successor of every instruction.
115 325421 : for (ScheduleGraphNode* node : graph_) {
116 194503 : node->AddSuccessor(new_node);
117 : }
118 130918 : graph_.push_back(new_node);
119 130918 : }
120 :
121 194503 : void InstructionScheduler::AddInstruction(Instruction* instr) {
122 194503 : ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
123 :
124 : // We should not have branches in the middle of a block.
125 : DCHECK_NE(instr->flags_mode(), kFlags_branch);
126 : DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
127 :
128 194503 : if (IsFixedRegisterParameter(instr)) {
129 6382 : if (last_live_in_reg_marker_ != nullptr) {
130 : last_live_in_reg_marker_->AddSuccessor(new_node);
131 : }
132 6382 : last_live_in_reg_marker_ = new_node;
133 : } else {
134 188121 : if (last_live_in_reg_marker_ != nullptr) {
135 : last_live_in_reg_marker_->AddSuccessor(new_node);
136 : }
137 :
138 : // Make sure that instructions are not scheduled before the last
139 : // deoptimization or trap point when they depend on it.
140 188121 : if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
141 10 : last_deopt_or_trap_->AddSuccessor(new_node);
142 : }
143 :
144 : // Instructions with side effects and memory operations can't be
145 : // reordered with respect to each other.
146 188121 : if (HasSideEffect(instr)) {
147 67785 : if (last_side_effect_instr_ != nullptr) {
148 37361 : last_side_effect_instr_->AddSuccessor(new_node);
149 : }
150 82136 : for (ScheduleGraphNode* load : pending_loads_) {
151 14351 : load->AddSuccessor(new_node);
152 : }
153 : pending_loads_.clear();
154 67785 : last_side_effect_instr_ = new_node;
155 120336 : } else if (IsLoadOperation(instr)) {
156 : // Load operations can't be reordered with side effects instructions but
157 : // independent loads can be reordered with respect to each other.
158 49610 : if (last_side_effect_instr_ != nullptr) {
159 9815 : last_side_effect_instr_->AddSuccessor(new_node);
160 : }
161 49610 : pending_loads_.push_back(new_node);
162 141442 : } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
163 : // Ensure that deopts or traps are not reordered with respect to
164 : // side-effect instructions.
165 10 : if (last_side_effect_instr_ != nullptr) {
166 5 : last_side_effect_instr_->AddSuccessor(new_node);
167 : }
168 10 : last_deopt_or_trap_ = new_node;
169 : }
170 :
171 : // Look for operand dependencies.
172 812603 : for (size_t i = 0; i < instr->InputCount(); ++i) {
173 : const InstructionOperand* input = instr->InputAt(i);
174 312241 : if (input->IsUnallocated()) {
175 : int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
176 : auto it = operands_map_.find(vreg);
177 197445 : if (it != operands_map_.end()) {
178 84902 : it->second->AddSuccessor(new_node);
179 : }
180 : }
181 : }
182 :
183 : // Record the virtual registers defined by this instruction.
184 496311 : for (size_t i = 0; i < instr->OutputCount(); ++i) {
185 : const InstructionOperand* output = instr->OutputAt(i);
186 154095 : if (output->IsUnallocated()) {
187 232144 : operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
188 116072 : new_node;
189 38023 : } else if (output->IsConstant()) {
190 76046 : operands_map_[ConstantOperand::cast(output)->virtual_register()] =
191 38023 : new_node;
192 : }
193 : }
194 : }
195 :
196 194503 : graph_.push_back(new_node);
197 194503 : }
198 :
199 : template <typename QueueType>
200 130918 : void InstructionScheduler::ScheduleBlock() {
201 : QueueType ready_list(this);
202 :
203 : // Compute total latencies so that we can schedule the critical path first.
204 130918 : ComputeTotalLatencies();
205 :
206 : // Add nodes which don't have dependencies to the ready list.
207 456339 : for (ScheduleGraphNode* node : graph_) {
208 325421 : if (!node->HasUnscheduledPredecessor()) {
209 162474 : ready_list.AddNode(node);
210 : }
211 : }
212 :
213 : // Go through the ready list and schedule the instructions.
214 : int cycle = 0;
215 798576 : while (!ready_list.IsEmpty()) {
216 333829 : ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217 :
218 333829 : if (candidate != nullptr) {
219 325421 : sequence()->AddInstruction(candidate->instruction());
220 :
221 680005 : for (ScheduleGraphNode* successor : candidate->successors()) {
222 : successor->DropUnscheduledPredecessor();
223 354584 : successor->set_start_cycle(
224 709168 : std::max(successor->start_cycle(), cycle + candidate->latency()));
225 :
226 354584 : if (!successor->HasUnscheduledPredecessor()) {
227 162947 : ready_list.AddNode(successor);
228 : }
229 : }
230 : }
231 :
232 333829 : cycle++;
233 : }
234 130918 : }
235 :
236 308477 : int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
237 308477 : switch (instr->arch_opcode()) {
238 : case kArchNop:
239 : case kArchFramePointer:
240 : case kArchParentFramePointer:
241 : case kArchStackSlot: // Despite its name this opcode will produce a
242 : // reference to a frame slot, so it is not affected
243 : // by the arm64 dual stack issues mentioned below.
244 : case kArchComment:
245 : case kArchDeoptimize:
246 : case kArchJmp:
247 : case kArchBinarySearchSwitch:
248 : case kArchLookupSwitch:
249 : case kArchRet:
250 : case kArchTableSwitch:
251 : case kArchThrowTerminator:
252 : return kNoOpcodeFlags;
253 :
254 : case kArchTruncateDoubleToI:
255 : case kIeee754Float64Acos:
256 : case kIeee754Float64Acosh:
257 : case kIeee754Float64Asin:
258 : case kIeee754Float64Asinh:
259 : case kIeee754Float64Atan:
260 : case kIeee754Float64Atanh:
261 : case kIeee754Float64Atan2:
262 : case kIeee754Float64Cbrt:
263 : case kIeee754Float64Cos:
264 : case kIeee754Float64Cosh:
265 : case kIeee754Float64Exp:
266 : case kIeee754Float64Expm1:
267 : case kIeee754Float64Log:
268 : case kIeee754Float64Log1p:
269 : case kIeee754Float64Log10:
270 : case kIeee754Float64Log2:
271 : case kIeee754Float64Pow:
272 : case kIeee754Float64Sin:
273 : case kIeee754Float64Sinh:
274 : case kIeee754Float64Tan:
275 : case kIeee754Float64Tanh:
276 : return kNoOpcodeFlags;
277 :
278 : case kArchStackPointer:
279 : // ArchStackPointer instruction loads the current stack pointer value and
280 : // must not be reordered with instruction with side effects.
281 0 : return kIsLoadOperation;
282 :
283 : case kArchWordPoisonOnSpeculation:
284 : // While poisoning operations have no side effect, they must not be
285 : // reordered relative to branches.
286 0 : return kHasSideEffect;
287 :
288 : case kArchPrepareCallCFunction:
289 : case kArchSaveCallerRegisters:
290 : case kArchRestoreCallerRegisters:
291 : case kArchPrepareTailCall:
292 : case kArchCallCFunction:
293 : case kArchCallCodeObject:
294 : case kArchCallJSFunction:
295 : case kArchCallWasmFunction:
296 : case kArchCallBuiltinPointer:
297 : case kArchTailCallCodeObjectFromJSFunction:
298 : case kArchTailCallCodeObject:
299 : case kArchTailCallAddress:
300 : case kArchTailCallWasm:
301 : case kArchDebugAbort:
302 : case kArchDebugBreak:
303 18221 : return kHasSideEffect;
304 :
305 : case kArchStoreWithWriteBarrier:
306 1750 : return kHasSideEffect;
307 :
308 : case kWord32AtomicLoadInt8:
309 : case kWord32AtomicLoadUint8:
310 : case kWord32AtomicLoadInt16:
311 : case kWord32AtomicLoadUint16:
312 : case kWord32AtomicLoadWord32:
313 0 : return kIsLoadOperation;
314 :
315 : case kWord32AtomicStoreWord8:
316 : case kWord32AtomicStoreWord16:
317 : case kWord32AtomicStoreWord32:
318 0 : return kHasSideEffect;
319 :
320 : case kWord32AtomicExchangeInt8:
321 : case kWord32AtomicExchangeUint8:
322 : case kWord32AtomicExchangeInt16:
323 : case kWord32AtomicExchangeUint16:
324 : case kWord32AtomicExchangeWord32:
325 : case kWord32AtomicCompareExchangeInt8:
326 : case kWord32AtomicCompareExchangeUint8:
327 : case kWord32AtomicCompareExchangeInt16:
328 : case kWord32AtomicCompareExchangeUint16:
329 : case kWord32AtomicCompareExchangeWord32:
330 : case kWord32AtomicAddInt8:
331 : case kWord32AtomicAddUint8:
332 : case kWord32AtomicAddInt16:
333 : case kWord32AtomicAddUint16:
334 : case kWord32AtomicAddWord32:
335 : case kWord32AtomicSubInt8:
336 : case kWord32AtomicSubUint8:
337 : case kWord32AtomicSubInt16:
338 : case kWord32AtomicSubUint16:
339 : case kWord32AtomicSubWord32:
340 : case kWord32AtomicAndInt8:
341 : case kWord32AtomicAndUint8:
342 : case kWord32AtomicAndInt16:
343 : case kWord32AtomicAndUint16:
344 : case kWord32AtomicAndWord32:
345 : case kWord32AtomicOrInt8:
346 : case kWord32AtomicOrUint8:
347 : case kWord32AtomicOrInt16:
348 : case kWord32AtomicOrUint16:
349 : case kWord32AtomicOrWord32:
350 : case kWord32AtomicXorInt8:
351 : case kWord32AtomicXorUint8:
352 : case kWord32AtomicXorInt16:
353 : case kWord32AtomicXorUint16:
354 : case kWord32AtomicXorWord32:
355 90 : return kHasSideEffect;
356 :
357 : #define CASE(Name) case k##Name:
358 : TARGET_ARCH_OPCODE_LIST(CASE)
359 : #undef CASE
360 193329 : return GetTargetInstructionFlags(instr);
361 : }
362 :
363 0 : UNREACHABLE();
364 : }
365 :
366 130918 : void InstructionScheduler::ComputeTotalLatencies() {
367 587257 : for (ScheduleGraphNode* node : base::Reversed(graph_)) {
368 : int max_latency = 0;
369 :
370 680005 : for (ScheduleGraphNode* successor : node->successors()) {
371 : DCHECK_NE(-1, successor->total_latency());
372 354584 : if (successor->total_latency() > max_latency) {
373 : max_latency = successor->total_latency();
374 : }
375 : }
376 :
377 325421 : node->set_total_latency(max_latency + node->latency());
378 : }
379 130918 : }
380 :
381 : } // namespace compiler
382 : } // namespace internal
383 121996 : } // namespace v8
|