Line data Source code
1 : // Copyright 2015 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/instruction-scheduler.h"
6 :
7 : #include "src/base/adapters.h"
8 : #include "src/base/utils/random-number-generator.h"
9 :
10 : namespace v8 {
11 : namespace internal {
12 : namespace compiler {
13 :
14 0 : void InstructionScheduler::SchedulingQueueBase::AddNode(
15 : ScheduleGraphNode* node) {
16 : // We keep the ready list sorted by total latency so that we can quickly find
17 : // the next best candidate to schedule.
18 0 : auto it = nodes_.begin();
19 0 : while ((it != nodes_.end()) &&
20 0 : ((*it)->total_latency() >= node->total_latency())) {
21 : ++it;
22 : }
23 0 : nodes_.insert(it, node);
24 0 : }
25 :
26 :
27 : InstructionScheduler::ScheduleGraphNode*
28 0 : InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 : DCHECK(!IsEmpty());
30 0 : auto candidate = nodes_.end();
31 0 : for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 : // We only consider instructions that have all their operands ready.
33 0 : if (cycle >= (*iterator)->start_cycle()) {
34 : candidate = iterator;
35 : break;
36 : }
37 : }
38 :
39 0 : if (candidate != nodes_.end()) {
40 0 : ScheduleGraphNode *result = *candidate;
41 : nodes_.erase(candidate);
42 0 : return result;
43 : }
44 :
45 : return nullptr;
46 : }
47 :
48 :
49 : InstructionScheduler::ScheduleGraphNode*
50 0 : InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
51 : DCHECK(!IsEmpty());
52 : // Choose a random element from the ready list.
53 0 : auto candidate = nodes_.begin();
54 : std::advance(candidate, isolate()->random_number_generator()->NextInt(
55 0 : static_cast<int>(nodes_.size())));
56 0 : ScheduleGraphNode *result = *candidate;
57 : nodes_.erase(candidate);
58 0 : return result;
59 : }
60 :
61 :
62 0 : InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
63 : Zone* zone,
64 : Instruction* instr)
65 : : instr_(instr),
66 : successors_(zone),
67 : unscheduled_predecessors_count_(0),
68 0 : latency_(GetInstructionLatency(instr)),
69 : total_latency_(-1),
70 0 : start_cycle_(-1) {
71 0 : }
72 :
73 :
74 0 : void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
75 : ScheduleGraphNode* node) {
76 0 : successors_.push_back(node);
77 0 : node->unscheduled_predecessors_count_++;
78 0 : }
79 :
80 0 : InstructionScheduler::InstructionScheduler(Zone* zone,
81 : InstructionSequence* sequence)
82 : : zone_(zone),
83 : sequence_(sequence),
84 : graph_(zone),
85 : last_side_effect_instr_(nullptr),
86 : pending_loads_(zone),
87 : last_live_in_reg_marker_(nullptr),
88 : last_deopt_or_trap_(nullptr),
89 0 : operands_map_(zone) {}
90 :
91 0 : void InstructionScheduler::StartBlock(RpoNumber rpo) {
92 : DCHECK(graph_.empty());
93 : DCHECK_NULL(last_side_effect_instr_);
94 : DCHECK(pending_loads_.empty());
95 : DCHECK_NULL(last_live_in_reg_marker_);
96 : DCHECK_NULL(last_deopt_or_trap_);
97 : DCHECK(operands_map_.empty());
98 0 : sequence()->StartBlock(rpo);
99 0 : }
100 :
101 :
102 0 : void InstructionScheduler::EndBlock(RpoNumber rpo) {
103 0 : if (FLAG_turbo_stress_instruction_scheduling) {
104 0 : ScheduleBlock<StressSchedulerQueue>();
105 : } else {
106 0 : ScheduleBlock<CriticalPathFirstQueue>();
107 : }
108 0 : sequence()->EndBlock(rpo);
109 : graph_.clear();
110 0 : last_side_effect_instr_ = nullptr;
111 : pending_loads_.clear();
112 0 : last_live_in_reg_marker_ = nullptr;
113 0 : last_deopt_or_trap_ = nullptr;
114 : operands_map_.clear();
115 0 : }
116 :
117 :
118 0 : void InstructionScheduler::AddInstruction(Instruction* instr) {
119 0 : ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
120 :
121 0 : if (IsBlockTerminator(instr)) {
122 : // Make sure that basic block terminators are not moved by adding them
123 : // as successor of every instruction.
124 0 : for (ScheduleGraphNode* node : graph_) {
125 0 : node->AddSuccessor(new_node);
126 : }
127 0 : } else if (IsFixedRegisterParameter(instr)) {
128 0 : if (last_live_in_reg_marker_ != nullptr) {
129 0 : last_live_in_reg_marker_->AddSuccessor(new_node);
130 : }
131 0 : last_live_in_reg_marker_ = new_node;
132 : } else {
133 0 : if (last_live_in_reg_marker_ != nullptr) {
134 0 : last_live_in_reg_marker_->AddSuccessor(new_node);
135 : }
136 :
137 : // Make sure that instructions are not scheduled before the last
138 : // deoptimization or trap point when they depend on it.
139 0 : if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
140 0 : last_deopt_or_trap_->AddSuccessor(new_node);
141 : }
142 :
143 : // Instructions with side effects and memory operations can't be
144 : // reordered with respect to each other.
145 0 : if (HasSideEffect(instr)) {
146 0 : if (last_side_effect_instr_ != nullptr) {
147 0 : last_side_effect_instr_->AddSuccessor(new_node);
148 : }
149 0 : for (ScheduleGraphNode* load : pending_loads_) {
150 0 : load->AddSuccessor(new_node);
151 : }
152 : pending_loads_.clear();
153 0 : last_side_effect_instr_ = new_node;
154 0 : } else if (IsLoadOperation(instr)) {
155 : // Load operations can't be reordered with side effects instructions but
156 : // independent loads can be reordered with respect to each other.
157 0 : if (last_side_effect_instr_ != nullptr) {
158 0 : last_side_effect_instr_->AddSuccessor(new_node);
159 : }
160 0 : pending_loads_.push_back(new_node);
161 0 : } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
162 : // Ensure that deopts or traps are not reordered with respect to
163 : // side-effect instructions.
164 0 : if (last_side_effect_instr_ != nullptr) {
165 0 : last_side_effect_instr_->AddSuccessor(new_node);
166 : }
167 0 : last_deopt_or_trap_ = new_node;
168 : }
169 :
170 : // Look for operand dependencies.
171 0 : for (size_t i = 0; i < instr->InputCount(); ++i) {
172 0 : const InstructionOperand* input = instr->InputAt(i);
173 0 : if (input->IsUnallocated()) {
174 0 : int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
175 : auto it = operands_map_.find(vreg);
176 0 : if (it != operands_map_.end()) {
177 0 : it->second->AddSuccessor(new_node);
178 : }
179 : }
180 : }
181 :
182 : // Record the virtual registers defined by this instruction.
183 0 : for (size_t i = 0; i < instr->OutputCount(); ++i) {
184 0 : const InstructionOperand* output = instr->OutputAt(i);
185 0 : if (output->IsUnallocated()) {
186 0 : operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
187 0 : new_node;
188 0 : } else if (output->IsConstant()) {
189 0 : operands_map_[ConstantOperand::cast(output)->virtual_register()] =
190 0 : new_node;
191 : }
192 : }
193 : }
194 :
195 0 : graph_.push_back(new_node);
196 0 : }
197 :
198 :
199 : template <typename QueueType>
200 0 : void InstructionScheduler::ScheduleBlock() {
201 : QueueType ready_list(this);
202 :
203 : // Compute total latencies so that we can schedule the critical path first.
204 0 : ComputeTotalLatencies();
205 :
206 : // Add nodes which don't have dependencies to the ready list.
207 0 : for (ScheduleGraphNode* node : graph_) {
208 0 : if (!node->HasUnscheduledPredecessor()) {
209 0 : ready_list.AddNode(node);
210 : }
211 : }
212 :
213 : // Go through the ready list and schedule the instructions.
214 : int cycle = 0;
215 0 : while (!ready_list.IsEmpty()) {
216 0 : ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217 :
218 0 : if (candidate != nullptr) {
219 0 : sequence()->AddInstruction(candidate->instruction());
220 :
221 0 : for (ScheduleGraphNode* successor : candidate->successors()) {
222 : successor->DropUnscheduledPredecessor();
223 : successor->set_start_cycle(
224 : std::max(successor->start_cycle(),
225 0 : cycle + candidate->latency()));
226 :
227 0 : if (!successor->HasUnscheduledPredecessor()) {
228 0 : ready_list.AddNode(successor);
229 : }
230 : }
231 : }
232 :
233 0 : cycle++;
234 : }
235 0 : }
236 :
237 :
238 0 : int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
239 0 : switch (instr->arch_opcode()) {
240 : case kArchNop:
241 : case kArchFramePointer:
242 : case kArchParentFramePointer:
243 : case kArchStackSlot: // Despite its name this opcode will produce a
244 : // reference to a frame slot, so it is not affected
245 : // by the arm64 dual stack issues mentioned below.
246 : case kArchComment:
247 : return kNoOpcodeFlags;
248 :
249 : case kArchTruncateDoubleToI:
250 : case kIeee754Float64Acos:
251 : case kIeee754Float64Acosh:
252 : case kIeee754Float64Asin:
253 : case kIeee754Float64Asinh:
254 : case kIeee754Float64Atan:
255 : case kIeee754Float64Atanh:
256 : case kIeee754Float64Atan2:
257 : case kIeee754Float64Cbrt:
258 : case kIeee754Float64Cos:
259 : case kIeee754Float64Cosh:
260 : case kIeee754Float64Exp:
261 : case kIeee754Float64Expm1:
262 : case kIeee754Float64Log:
263 : case kIeee754Float64Log1p:
264 : case kIeee754Float64Log10:
265 : case kIeee754Float64Log2:
266 : case kIeee754Float64Pow:
267 : case kIeee754Float64Sin:
268 : case kIeee754Float64Sinh:
269 : case kIeee754Float64Tan:
270 : case kIeee754Float64Tanh:
271 : #ifdef V8_TARGET_ARCH_ARM64
272 : // This is an unfortunate effect of arm64 dual stack pointers:
273 : // * TruncateDoubleToI may call a stub, and the stub will push and pop
274 : // values onto the stack. Push updates both CSP and JSSP but pop only
275 : // restores JSSP.
276 : // * kIeee754XXX opcodes call a C Function and the call macro may update
277 : // CSP to meet alignment requirements but it will not bring back CSP to
278 : // its original value.
279 : // Those opcode cannot be reordered with instructions with side effects
280 : // such as Arm64ClaimCSP.
281 : // TODO(arm64): remove when JSSP is gone.
282 : return kHasSideEffect;
283 : #else
284 : return kNoOpcodeFlags;
285 : #endif
286 :
287 : case kArchStackPointer:
288 : // ArchStackPointer instruction loads the current stack pointer value and
289 : // must not be reordered with instruction with side effects.
290 0 : return kIsLoadOperation;
291 :
292 : case kArchPrepareCallCFunction:
293 : case kArchSaveCallerRegisters:
294 : case kArchRestoreCallerRegisters:
295 : case kArchPrepareTailCall:
296 : case kArchCallCFunction:
297 : case kArchCallCodeObject:
298 : case kArchCallJSFunction:
299 0 : return kHasSideEffect;
300 :
301 : case kArchTailCallCodeObjectFromJSFunction:
302 : case kArchTailCallCodeObject:
303 : case kArchTailCallAddress:
304 0 : return kHasSideEffect | kIsBlockTerminator;
305 :
306 : case kArchDeoptimize:
307 : case kArchJmp:
308 : case kArchLookupSwitch:
309 : case kArchTableSwitch:
310 : case kArchRet:
311 : case kArchDebugAbort:
312 : case kArchDebugBreak:
313 : case kArchThrowTerminator:
314 0 : return kIsBlockTerminator;
315 :
316 : case kCheckedLoadInt8:
317 : case kCheckedLoadUint8:
318 : case kCheckedLoadInt16:
319 : case kCheckedLoadUint16:
320 : case kCheckedLoadWord32:
321 : case kCheckedLoadWord64:
322 : case kCheckedLoadFloat32:
323 : case kCheckedLoadFloat64:
324 0 : return kIsLoadOperation;
325 :
326 : case kCheckedStoreWord8:
327 : case kCheckedStoreWord16:
328 : case kCheckedStoreWord32:
329 : case kCheckedStoreWord64:
330 : case kCheckedStoreFloat32:
331 : case kCheckedStoreFloat64:
332 : case kArchStoreWithWriteBarrier:
333 0 : return kHasSideEffect;
334 :
335 : case kAtomicLoadInt8:
336 : case kAtomicLoadUint8:
337 : case kAtomicLoadInt16:
338 : case kAtomicLoadUint16:
339 : case kAtomicLoadWord32:
340 0 : return kIsLoadOperation;
341 :
342 : case kAtomicStoreWord8:
343 : case kAtomicStoreWord16:
344 : case kAtomicStoreWord32:
345 0 : return kHasSideEffect;
346 :
347 : case kAtomicExchangeInt8:
348 : case kAtomicExchangeUint8:
349 : case kAtomicExchangeInt16:
350 : case kAtomicExchangeUint16:
351 : case kAtomicExchangeWord32:
352 : case kAtomicCompareExchangeInt8:
353 : case kAtomicCompareExchangeUint8:
354 : case kAtomicCompareExchangeInt16:
355 : case kAtomicCompareExchangeUint16:
356 : case kAtomicCompareExchangeWord32:
357 : case kAtomicAddInt8:
358 : case kAtomicAddUint8:
359 : case kAtomicAddInt16:
360 : case kAtomicAddUint16:
361 : case kAtomicAddWord32:
362 : case kAtomicSubInt8:
363 : case kAtomicSubUint8:
364 : case kAtomicSubInt16:
365 : case kAtomicSubUint16:
366 : case kAtomicSubWord32:
367 : case kAtomicAndInt8:
368 : case kAtomicAndUint8:
369 : case kAtomicAndInt16:
370 : case kAtomicAndUint16:
371 : case kAtomicAndWord32:
372 : case kAtomicOrInt8:
373 : case kAtomicOrUint8:
374 : case kAtomicOrInt16:
375 : case kAtomicOrUint16:
376 : case kAtomicOrWord32:
377 : case kAtomicXorInt8:
378 : case kAtomicXorUint8:
379 : case kAtomicXorInt16:
380 : case kAtomicXorUint16:
381 : case kAtomicXorWord32:
382 0 : return kHasSideEffect;
383 :
384 : #define CASE(Name) case k##Name:
385 : TARGET_ARCH_OPCODE_LIST(CASE)
386 : #undef CASE
387 0 : return GetTargetInstructionFlags(instr);
388 : }
389 :
390 0 : UNREACHABLE();
391 : }
392 :
393 :
394 0 : bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
395 0 : return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
396 0 : (instr->flags_mode() == kFlags_branch));
397 : }
398 :
399 :
400 0 : void InstructionScheduler::ComputeTotalLatencies() {
401 0 : for (ScheduleGraphNode* node : base::Reversed(graph_)) {
402 : int max_latency = 0;
403 :
404 0 : for (ScheduleGraphNode* successor : node->successors()) {
405 : DCHECK_NE(-1, successor->total_latency());
406 0 : if (successor->total_latency() > max_latency) {
407 : max_latency = successor->total_latency();
408 : }
409 : }
410 :
411 0 : node->set_total_latency(max_latency + node->latency());
412 : }
413 0 : }
414 :
415 : } // namespace compiler
416 : } // namespace internal
417 : } // namespace v8
|