Line data Source code
1 : // Copyright 2015 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/instruction-scheduler.h"
6 :
7 : #include "src/base/adapters.h"
8 : #include "src/base/utils/random-number-generator.h"
9 : #include "src/isolate.h"
10 :
11 : namespace v8 {
12 : namespace internal {
13 : namespace compiler {
14 :
15 322679 : void InstructionScheduler::SchedulingQueueBase::AddNode(
16 : ScheduleGraphNode* node) {
17 : // We keep the ready list sorted by total latency so that we can quickly find
18 : // the next best candidate to schedule.
19 : auto it = nodes_.begin();
20 524434 : while ((it != nodes_.end()) &&
21 115227 : ((*it)->total_latency() >= node->total_latency())) {
22 : ++it;
23 : }
24 322679 : nodes_.insert(it, node);
25 322679 : }
26 :
27 : InstructionScheduler::ScheduleGraphNode*
28 331087 : InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29 : DCHECK(!IsEmpty());
30 : auto candidate = nodes_.end();
31 339607 : for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32 : // We only consider instructions that have all their operands ready.
33 331199 : if (cycle >= (*iterator)->start_cycle()) {
34 : candidate = iterator;
35 : break;
36 : }
37 : }
38 :
39 331087 : if (candidate != nodes_.end()) {
40 322679 : ScheduleGraphNode* result = *candidate;
41 : nodes_.erase(candidate);
42 322679 : return result;
43 : }
44 :
45 : return nullptr;
46 : }
47 :
48 : InstructionScheduler::ScheduleGraphNode*
49 0 : InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50 : DCHECK(!IsEmpty());
51 : // Choose a random element from the ready list.
52 : auto candidate = nodes_.begin();
53 0 : std::advance(candidate, isolate()->random_number_generator()->NextInt(
54 : static_cast<int>(nodes_.size())));
55 0 : ScheduleGraphNode* result = *candidate;
56 : nodes_.erase(candidate);
57 0 : return result;
58 : }
59 :
60 322679 : InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61 : Instruction* instr)
62 : : instr_(instr),
63 : successors_(zone),
64 : unscheduled_predecessors_count_(0),
65 322679 : latency_(GetInstructionLatency(instr)),
66 : total_latency_(-1),
67 968037 : start_cycle_(-1) {}
68 :
69 0 : void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70 : ScheduleGraphNode* node) {
71 351770 : successors_.push_back(node);
72 351770 : node->unscheduled_predecessors_count_++;
73 0 : }
74 :
75 2203 : InstructionScheduler::InstructionScheduler(Zone* zone,
76 : InstructionSequence* sequence)
77 : : zone_(zone),
78 : sequence_(sequence),
79 : graph_(zone),
80 : last_side_effect_instr_(nullptr),
81 : pending_loads_(zone),
82 : last_live_in_reg_marker_(nullptr),
83 : last_deopt_or_trap_(nullptr),
84 8812 : operands_map_(zone) {}
85 :
86 129776 : void InstructionScheduler::StartBlock(RpoNumber rpo) {
87 : DCHECK(graph_.empty());
88 : DCHECK_NULL(last_side_effect_instr_);
89 : DCHECK(pending_loads_.empty());
90 : DCHECK_NULL(last_live_in_reg_marker_);
91 : DCHECK_NULL(last_deopt_or_trap_);
92 : DCHECK(operands_map_.empty());
93 129776 : sequence()->StartBlock(rpo);
94 129776 : }
95 :
96 129776 : void InstructionScheduler::EndBlock(RpoNumber rpo) {
97 129776 : if (FLAG_turbo_stress_instruction_scheduling) {
98 0 : ScheduleBlock<StressSchedulerQueue>();
99 : } else {
100 129776 : ScheduleBlock<CriticalPathFirstQueue>();
101 : }
102 129776 : sequence()->EndBlock(rpo);
103 : graph_.clear();
104 129776 : last_side_effect_instr_ = nullptr;
105 : pending_loads_.clear();
106 129776 : last_live_in_reg_marker_ = nullptr;
107 129776 : last_deopt_or_trap_ = nullptr;
108 : operands_map_.clear();
109 129776 : }
110 :
111 129776 : void InstructionScheduler::AddTerminator(Instruction* instr) {
112 129776 : ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
113 : // Make sure that basic block terminators are not moved by adding them
114 : // as successor of every instruction.
115 322679 : for (ScheduleGraphNode* node : graph_) {
116 192903 : node->AddSuccessor(new_node);
117 : }
118 129776 : graph_.push_back(new_node);
119 129776 : }
120 :
121 192903 : void InstructionScheduler::AddInstruction(Instruction* instr) {
122 192903 : ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
123 :
124 : // We should not have branches in the middle of a block.
125 : DCHECK_NE(instr->flags_mode(), kFlags_branch);
126 : DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
127 :
128 192903 : if (IsFixedRegisterParameter(instr)) {
129 6386 : if (last_live_in_reg_marker_ != nullptr) {
130 : last_live_in_reg_marker_->AddSuccessor(new_node);
131 : }
132 6386 : last_live_in_reg_marker_ = new_node;
133 : } else {
134 186517 : if (last_live_in_reg_marker_ != nullptr) {
135 : last_live_in_reg_marker_->AddSuccessor(new_node);
136 : }
137 :
138 : // Make sure that instructions are not scheduled before the last
139 : // deoptimization or trap point when they depend on it.
140 186517 : if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
141 10 : last_deopt_or_trap_->AddSuccessor(new_node);
142 : }
143 :
144 : // Instructions with side effects and memory operations can't be
145 : // reordered with respect to each other.
146 186517 : if (HasSideEffect(instr)) {
147 66929 : if (last_side_effect_instr_ != nullptr) {
148 36913 : last_side_effect_instr_->AddSuccessor(new_node);
149 : }
150 81172 : for (ScheduleGraphNode* load : pending_loads_) {
151 14243 : load->AddSuccessor(new_node);
152 : }
153 : pending_loads_.clear();
154 66929 : last_side_effect_instr_ = new_node;
155 119588 : } else if (IsLoadOperation(instr)) {
156 : // Load operations can't be reordered with side effects instructions but
157 : // independent loads can be reordered with respect to each other.
158 49326 : if (last_side_effect_instr_ != nullptr) {
159 9767 : last_side_effect_instr_->AddSuccessor(new_node);
160 : }
161 49326 : pending_loads_.push_back(new_node);
162 140514 : } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
163 : // Ensure that deopts or traps are not reordered with respect to
164 : // side-effect instructions.
165 10 : if (last_side_effect_instr_ != nullptr) {
166 5 : last_side_effect_instr_->AddSuccessor(new_node);
167 : }
168 10 : last_deopt_or_trap_ = new_node;
169 : }
170 :
171 : // Look for operand dependencies.
172 805223 : for (size_t i = 0; i < instr->InputCount(); ++i) {
173 : const InstructionOperand* input = instr->InputAt(i);
174 309353 : if (input->IsUnallocated()) {
175 : int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
176 : auto it = operands_map_.find(vreg);
177 195621 : if (it != operands_map_.end()) {
178 84290 : it->second->AddSuccessor(new_node);
179 : }
180 : }
181 : }
182 :
183 : // Record the virtual registers defined by this instruction.
184 492267 : for (size_t i = 0; i < instr->OutputCount(); ++i) {
185 : const InstructionOperand* output = instr->OutputAt(i);
186 152875 : if (output->IsUnallocated()) {
187 230248 : operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
188 115124 : new_node;
189 37751 : } else if (output->IsConstant()) {
190 75502 : operands_map_[ConstantOperand::cast(output)->virtual_register()] =
191 37751 : new_node;
192 : }
193 : }
194 : }
195 :
196 192903 : graph_.push_back(new_node);
197 192903 : }
198 :
199 : template <typename QueueType>
200 129776 : void InstructionScheduler::ScheduleBlock() {
201 : QueueType ready_list(this);
202 :
203 : // Compute total latencies so that we can schedule the critical path first.
204 129776 : ComputeTotalLatencies();
205 :
206 : // Add nodes which don't have dependencies to the ready list.
207 452455 : for (ScheduleGraphNode* node : graph_) {
208 322679 : if (!node->HasUnscheduledPredecessor()) {
209 161070 : ready_list.AddNode(node);
210 : }
211 : }
212 :
213 : // Go through the ready list and schedule the instructions.
214 : int cycle = 0;
215 791950 : while (!ready_list.IsEmpty()) {
216 331087 : ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217 :
218 331087 : if (candidate != nullptr) {
219 322679 : sequence()->AddInstruction(candidate->instruction());
220 :
221 674449 : for (ScheduleGraphNode* successor : candidate->successors()) {
222 : successor->DropUnscheduledPredecessor();
223 351770 : successor->set_start_cycle(
224 703540 : std::max(successor->start_cycle(), cycle + candidate->latency()));
225 :
226 351770 : if (!successor->HasUnscheduledPredecessor()) {
227 161609 : ready_list.AddNode(successor);
228 : }
229 : }
230 : }
231 :
232 331087 : cycle++;
233 : }
234 129776 : }
235 :
236 306125 : int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
237 306125 : switch (instr->arch_opcode()) {
238 : case kArchNop:
239 : case kArchFramePointer:
240 : case kArchParentFramePointer:
241 : case kArchStackSlot: // Despite its name this opcode will produce a
242 : // reference to a frame slot, so it is not affected
243 : // by the arm64 dual stack issues mentioned below.
244 : case kArchComment:
245 : case kArchDeoptimize:
246 : case kArchJmp:
247 : case kArchBinarySearchSwitch:
248 : case kArchLookupSwitch:
249 : case kArchRet:
250 : case kArchTableSwitch:
251 : case kArchThrowTerminator:
252 : return kNoOpcodeFlags;
253 :
254 : case kArchTruncateDoubleToI:
255 : case kIeee754Float64Acos:
256 : case kIeee754Float64Acosh:
257 : case kIeee754Float64Asin:
258 : case kIeee754Float64Asinh:
259 : case kIeee754Float64Atan:
260 : case kIeee754Float64Atanh:
261 : case kIeee754Float64Atan2:
262 : case kIeee754Float64Cbrt:
263 : case kIeee754Float64Cos:
264 : case kIeee754Float64Cosh:
265 : case kIeee754Float64Exp:
266 : case kIeee754Float64Expm1:
267 : case kIeee754Float64Log:
268 : case kIeee754Float64Log1p:
269 : case kIeee754Float64Log10:
270 : case kIeee754Float64Log2:
271 : case kIeee754Float64Pow:
272 : case kIeee754Float64Sin:
273 : case kIeee754Float64Sinh:
274 : case kIeee754Float64Tan:
275 : case kIeee754Float64Tanh:
276 : return kNoOpcodeFlags;
277 :
278 : case kArchStackPointer:
279 : // ArchStackPointer instruction loads the current stack pointer value and
280 : // must not be reordered with instruction with side effects.
281 0 : return kIsLoadOperation;
282 :
283 : case kArchWordPoisonOnSpeculation:
284 : // While poisoning operations have no side effect, they must not be
285 : // reordered relative to branches.
286 0 : return kHasSideEffect;
287 :
288 : case kArchPrepareCallCFunction:
289 : case kArchSaveCallerRegisters:
290 : case kArchRestoreCallerRegisters:
291 : case kArchPrepareTailCall:
292 : case kArchCallCFunction:
293 : case kArchCallCodeObject:
294 : case kArchCallJSFunction:
295 : case kArchCallWasmFunction:
296 : case kArchCallBuiltinPointer:
297 : case kArchTailCallCodeObjectFromJSFunction:
298 : case kArchTailCallCodeObject:
299 : case kArchTailCallAddress:
300 : case kArchTailCallWasm:
301 : case kArchDebugAbort:
302 : case kArchDebugBreak:
303 18137 : return kHasSideEffect;
304 :
305 : case kArchStoreWithWriteBarrier:
306 1750 : return kHasSideEffect;
307 :
308 : case kWord32AtomicLoadInt8:
309 : case kWord32AtomicLoadUint8:
310 : case kWord32AtomicLoadInt16:
311 : case kWord32AtomicLoadUint16:
312 : case kWord32AtomicLoadWord32:
313 0 : return kIsLoadOperation;
314 :
315 : case kWord32AtomicStoreWord8:
316 : case kWord32AtomicStoreWord16:
317 : case kWord32AtomicStoreWord32:
318 0 : return kHasSideEffect;
319 :
320 : case kWord32AtomicExchangeInt8:
321 : case kWord32AtomicExchangeUint8:
322 : case kWord32AtomicExchangeInt16:
323 : case kWord32AtomicExchangeUint16:
324 : case kWord32AtomicExchangeWord32:
325 : case kWord32AtomicCompareExchangeInt8:
326 : case kWord32AtomicCompareExchangeUint8:
327 : case kWord32AtomicCompareExchangeInt16:
328 : case kWord32AtomicCompareExchangeUint16:
329 : case kWord32AtomicCompareExchangeWord32:
330 : case kWord32AtomicAddInt8:
331 : case kWord32AtomicAddUint8:
332 : case kWord32AtomicAddInt16:
333 : case kWord32AtomicAddUint16:
334 : case kWord32AtomicAddWord32:
335 : case kWord32AtomicSubInt8:
336 : case kWord32AtomicSubUint8:
337 : case kWord32AtomicSubInt16:
338 : case kWord32AtomicSubUint16:
339 : case kWord32AtomicSubWord32:
340 : case kWord32AtomicAndInt8:
341 : case kWord32AtomicAndUint8:
342 : case kWord32AtomicAndInt16:
343 : case kWord32AtomicAndUint16:
344 : case kWord32AtomicAndWord32:
345 : case kWord32AtomicOrInt8:
346 : case kWord32AtomicOrUint8:
347 : case kWord32AtomicOrInt16:
348 : case kWord32AtomicOrUint16:
349 : case kWord32AtomicOrWord32:
350 : case kWord32AtomicXorInt8:
351 : case kWord32AtomicXorUint8:
352 : case kWord32AtomicXorInt16:
353 : case kWord32AtomicXorUint16:
354 : case kWord32AtomicXorWord32:
355 90 : return kHasSideEffect;
356 :
357 : #define CASE(Name) case k##Name:
358 : TARGET_ARCH_OPCODE_LIST(CASE)
359 : #undef CASE
360 191725 : return GetTargetInstructionFlags(instr);
361 : }
362 :
363 0 : UNREACHABLE();
364 : }
365 :
366 129776 : void InstructionScheduler::ComputeTotalLatencies() {
367 582231 : for (ScheduleGraphNode* node : base::Reversed(graph_)) {
368 : int max_latency = 0;
369 :
370 674449 : for (ScheduleGraphNode* successor : node->successors()) {
371 : DCHECK_NE(-1, successor->total_latency());
372 351770 : if (successor->total_latency() > max_latency) {
373 : max_latency = successor->total_latency();
374 : }
375 : }
376 :
377 322679 : node->set_total_latency(max_latency + node->latency());
378 : }
379 129776 : }
380 :
381 : } // namespace compiler
382 : } // namespace internal
383 122036 : } // namespace v8
|