Line data Source code
1 : // Copyright 2015 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/js-inlining-heuristic.h"
6 :
7 : #include "src/compiler/common-operator.h"
8 : #include "src/compiler/compiler-source-position-table.h"
9 : #include "src/compiler/node-matchers.h"
10 : #include "src/compiler/simplified-operator.h"
11 : #include "src/objects-inl.h"
12 : #include "src/optimized-compilation-info.h"
13 :
14 : namespace v8 {
15 : namespace internal {
16 : namespace compiler {
17 :
18 : #define TRACE(...) \
19 : do { \
20 : if (FLAG_trace_turbo_inlining) PrintF(__VA_ARGS__); \
21 : } while (false)
22 :
23 : namespace {
24 :
25 : bool IsSmallInlineFunction(BytecodeArrayRef bytecode) {
26 : // Forcibly inline small functions.
27 108471 : if (bytecode.length() <= FLAG_max_inlined_bytecode_size_small) {
28 : return true;
29 : }
30 : return false;
31 : }
32 :
33 : } // namespace
34 :
35 593814 : JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions(
36 : Node* node, int functions_size) {
37 : DCHECK_NE(0, functions_size);
38 : Node* callee = node->InputAt(0);
39 : Candidate out;
40 593814 : out.node = node;
41 :
42 : HeapObjectMatcher m(callee);
43 691576 : if (m.HasValue() && m.Ref(broker()).IsJSFunction()) {
44 193440 : out.functions[0] = m.Ref(broker()).AsJSFunction();
45 96720 : JSFunctionRef function = out.functions[0].value();
46 96720 : if (function.IsSerializedForCompilation()) {
47 167468 : out.bytecode[0] = function.shared().GetBytecodeArray();
48 : }
49 96720 : out.num_functions = 1;
50 : return out;
51 : }
52 497094 : if (m.IsPhi()) {
53 : int const value_input_count = m.node()->op()->ValueInputCount();
54 21460 : if (value_input_count > functions_size) {
55 0 : out.num_functions = 0;
56 0 : return out;
57 : }
58 24008 : for (int n = 0; n < value_input_count; ++n) {
59 : HeapObjectMatcher m(callee->InputAt(n));
60 23785 : if (!m.HasValue() || !m.Ref(broker()).IsJSFunction()) {
61 21205 : out.num_functions = 0;
62 21205 : return out;
63 : }
64 :
65 2548 : out.functions[n] = m.Ref(broker()).AsJSFunction();
66 1274 : JSFunctionRef function = out.functions[n].value();
67 1274 : if (function.IsSerializedForCompilation()) {
68 1516 : out.bytecode[n] = function.shared().GetBytecodeArray(), isolate();
69 : }
70 : }
71 255 : out.num_functions = value_input_count;
72 255 : return out;
73 : }
74 475634 : if (m.IsJSCreateClosure()) {
75 26200 : CreateClosureParameters const& p = CreateClosureParametersOf(m.op());
76 : DCHECK(!out.functions[0].has_value());
77 26203 : out.shared_info = SharedFunctionInfoRef(broker(), p.shared_info());
78 26203 : SharedFunctionInfoRef shared_info = out.shared_info.value();
79 26203 : if (shared_info.HasBytecodeArray()) {
80 49286 : out.bytecode[0] = shared_info.GetBytecodeArray();
81 : }
82 26202 : out.num_functions = 1;
83 : return out;
84 : }
85 449434 : out.num_functions = 0;
86 449434 : return out;
87 : }
88 :
89 33699405 : Reduction JSInliningHeuristic::Reduce(Node* node) {
90 : DisallowHeapAccessIf no_heap_acess(FLAG_concurrent_inlining);
91 :
92 33699405 : if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
93 :
94 : // Check if we already saw that {node} before, and if so, just skip it.
95 721447 : if (seen_.find(node->id()) != seen_.end()) return NoChange();
96 1187633 : seen_.insert(node->id());
97 :
98 : // Check if the {node} is an appropriate candidate for inlining.
99 593816 : Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism);
100 593815 : if (candidate.num_functions == 0) {
101 : return NoChange();
102 123176 : } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) {
103 0 : TRACE(
104 : "Not considering call site #%d:%s, because polymorphic inlining "
105 : "is disabled\n",
106 : node->id(), node->op()->mnemonic());
107 : return NoChange();
108 : }
109 :
110 : bool can_inline = false, force_inline_small = true;
111 123176 : candidate.total_size = 0;
112 123176 : Node* frame_state = NodeProperties::GetFrameStateInput(node);
113 123176 : FrameStateInfo const& frame_info = FrameStateInfoOf(frame_state->op());
114 : Handle<SharedFunctionInfo> frame_shared_info;
115 370036 : for (int i = 0; i < candidate.num_functions; ++i) {
116 123431 : if (!candidate.bytecode[i].has_value()) {
117 : // We're already missing critical data which wouldn't allow us to
118 : // continue the inlining checks. Log a warning and continue.
119 14959 : if (candidate.functions[i].has_value()) {
120 13400 : TRACE_BROKER(broker(),
121 : "Missing bytecode array trying to inline function "
122 : << candidate.functions[i].value().object().address());
123 : } else {
124 1559 : TRACE_BROKER(
125 : broker(),
126 : "Missing bytecode array trying to inline function with SFI "
127 : << candidate.shared_info.value().object().address());
128 : }
129 : // Those functions that don't have their bytecode serialized probably
130 : // don't have the SFI either, so we exit the loop early.
131 14959 : candidate.can_inline_function[i] = false;
132 14959 : continue;
133 : }
134 :
135 108472 : SharedFunctionInfoRef shared = candidate.functions[i].has_value()
136 : ? candidate.functions[i].value().shared()
137 216944 : : candidate.shared_info.value();
138 108472 : candidate.can_inline_function[i] = shared.IsInlineable();
139 : // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
140 : // recurion like f() -> g() -> f(). The indirect recursion is helpful in
141 : // cases where f() is a small dispatch function that calls the appropriate
142 : // function. In the case of direct recursion, we only have some static
143 : // information for the first level of inlining and it may not be that useful
144 : // to just inline one level in recursive calls. In some cases like tail
145 : // recursion we may benefit from recursive inlining, if we have additional
146 : // analysis that converts them to iterative implementations. Though it is
147 : // not obvious if such an anlysis is needed.
148 216895 : if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
149 108424 : frame_shared_info.equals(shared.object())) {
150 5675 : TRACE("Not considering call site #%d:%s, because of recursive inlining\n",
151 : node->id(), node->op()->mnemonic());
152 5675 : candidate.can_inline_function[i] = false;
153 : }
154 : // A function reaching this point should always have its bytecode
155 : // serialized.
156 108471 : BytecodeArrayRef bytecode = candidate.bytecode[i].value();
157 108471 : if (candidate.can_inline_function[i]) {
158 : can_inline = true;
159 101205 : candidate.total_size += bytecode.length();
160 : }
161 : // We don't force inline small functions if any of them is not inlineable.
162 108471 : if (!IsSmallInlineFunction(bytecode)) {
163 : force_inline_small = false;
164 : }
165 : }
166 123175 : if (!can_inline) return NoChange();
167 :
168 : // Gather feedback on how often this call site has been hit before.
169 101161 : if (node->opcode() == IrOpcode::kJSCall) {
170 94030 : CallParameters const p = CallParametersOf(node->op());
171 94030 : candidate.frequency = p.frequency();
172 : } else {
173 7131 : ConstructParameters const p = ConstructParametersOf(node->op());
174 7131 : candidate.frequency = p.frequency();
175 : }
176 :
177 : // Handling of special inlining modes right away:
178 : // - For restricted inlining: stop all handling at this point.
179 : // - For stressing inlining: immediately handle all functions.
180 101161 : switch (mode_) {
181 : case kRestrictedInlining:
182 : return NoChange();
183 : case kStressInlining:
184 0 : return InlineCandidate(candidate, false);
185 : case kGeneralInlining:
186 : break;
187 : }
188 :
189 : // Don't consider a {candidate} whose frequency is below the
190 : // threshold, i.e. a call site that is only hit once every N
191 : // invocations of the caller.
192 198676 : if (candidate.frequency.IsKnown() &&
193 98013 : candidate.frequency.value() < FLAG_min_inlining_frequency) {
194 : return NoChange();
195 : }
196 :
197 : // Forcibly inline small functions here. In the case of polymorphic inlining
198 : // force_inline_small is set only when all functions are small.
199 86560 : if (force_inline_small &&
200 15131 : cumulative_count_ < FLAG_max_inlined_bytecode_size_absolute) {
201 14755 : TRACE("Inlining small function(s) at call site #%d:%s\n", node->id(),
202 : node->op()->mnemonic());
203 14755 : return InlineCandidate(candidate, true);
204 : }
205 :
206 : // In the general case we remember the candidate for later.
207 : candidates_.insert(candidate);
208 : return NoChange();
209 : }
210 :
211 515231 : void JSInliningHeuristic::Finalize() {
212 515231 : if (candidates_.empty()) return; // Nothing to do without candidates.
213 52077 : if (FLAG_trace_turbo_inlining) PrintCandidates();
214 :
215 : // We inline at most one candidate in every iteration of the fixpoint.
216 : // This is to ensure that we don't consume the full inlining budget
217 : // on things that aren't called very often.
218 : // TODO(bmeurer): Use std::priority_queue instead of std::set here.
219 57629 : while (!candidates_.empty()) {
220 : auto i = candidates_.begin();
221 56674 : Candidate candidate = *i;
222 : candidates_.erase(i);
223 :
224 : // Make sure we have some extra budget left, so that any small functions
225 : // exposed by this function would be given a chance to inline.
226 : double size_of_candidate =
227 56674 : candidate.total_size * FLAG_reserve_inline_budget_scale_factor;
228 56674 : int total_size = cumulative_count_ + static_cast<int>(size_of_candidate);
229 56674 : if (total_size > FLAG_max_inlined_bytecode_size_cumulative) {
230 : // Try if any smaller functions are available to inline.
231 5546 : continue;
232 : }
233 :
234 : // Make sure we don't try to inline dead candidate nodes.
235 102256 : if (!candidate.node->IsDead()) {
236 51128 : Reduction const reduction = InlineCandidate(candidate, false);
237 51128 : if (reduction.Changed()) return;
238 : }
239 : }
240 : }
241 :
242 : namespace {
243 :
244 : struct NodeAndIndex {
245 : Node* node;
246 : int index;
247 : };
248 :
249 72 : bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
250 : NodeAndIndex* uses_buffer, size_t* use_count,
251 : size_t max_uses) {
252 : // Only accumulate states that are not shared with other users.
253 72 : if (state_values->UseCount() > 1) return true;
254 192 : for (int i = 0; i < state_values->InputCount(); i++) {
255 : Node* input = state_values->InputAt(i);
256 78 : if (input->opcode() == IrOpcode::kStateValues) {
257 0 : if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
258 : max_uses)) {
259 : return false;
260 : }
261 78 : } else if (input == node) {
262 36 : if (*use_count >= max_uses) return false;
263 36 : uses_buffer[*use_count] = {state_values, i};
264 36 : (*use_count)++;
265 : }
266 : }
267 : return true;
268 : }
269 :
270 : } // namespace
271 :
272 108 : Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values,
273 : Node* from, Node* to,
274 : StateCloneMode mode) {
275 : // Only rename in states that are not shared with other users. This needs to
276 : // be in sync with the condition in {CollectStateValuesOwnedUses}.
277 108 : if (state_values->UseCount() > 1) return state_values;
278 72 : Node* copy = mode == kChangeInPlace ? state_values : nullptr;
279 384 : for (int i = 0; i < state_values->InputCount(); i++) {
280 : Node* input = state_values->InputAt(i);
281 : Node* processed;
282 156 : if (input->opcode() == IrOpcode::kStateValues) {
283 0 : processed = DuplicateStateValuesAndRename(input, from, to, mode);
284 156 : } else if (input == from) {
285 : processed = to;
286 : } else {
287 : processed = input;
288 : }
289 156 : if (processed != input) {
290 72 : if (!copy) {
291 36 : copy = graph()->CloneNode(state_values);
292 : }
293 72 : copy->ReplaceInput(i, processed);
294 : }
295 : }
296 72 : return copy ? copy : state_values;
297 : }
298 :
299 : namespace {
300 :
301 72 : bool CollectFrameStateUniqueUses(Node* node, Node* frame_state,
302 : NodeAndIndex* uses_buffer, size_t* use_count,
303 : size_t max_uses) {
304 : // Only accumulate states that are not shared with other users.
305 72 : if (frame_state->UseCount() > 1) return true;
306 72 : if (frame_state->InputAt(kFrameStateStackInput) == node) {
307 0 : if (*use_count >= max_uses) return false;
308 0 : uses_buffer[*use_count] = {frame_state, kFrameStateStackInput};
309 0 : (*use_count)++;
310 : }
311 72 : if (!CollectStateValuesOwnedUses(node,
312 : frame_state->InputAt(kFrameStateLocalsInput),
313 : uses_buffer, use_count, max_uses)) {
314 : return false;
315 : }
316 72 : return true;
317 : }
318 :
319 : } // namespace
320 :
321 144 : Node* JSInliningHeuristic::DuplicateFrameStateAndRename(Node* frame_state,
322 : Node* from, Node* to,
323 : StateCloneMode mode) {
324 : // Only rename in states that are not shared with other users. This needs to
325 : // be in sync with the condition in {DuplicateFrameStateAndRename}.
326 144 : if (frame_state->UseCount() > 1) return frame_state;
327 108 : Node* copy = mode == kChangeInPlace ? frame_state : nullptr;
328 108 : if (frame_state->InputAt(kFrameStateStackInput) == from) {
329 0 : if (!copy) {
330 0 : copy = graph()->CloneNode(frame_state);
331 : }
332 0 : copy->ReplaceInput(kFrameStateStackInput, to);
333 : }
334 : Node* locals = frame_state->InputAt(kFrameStateLocalsInput);
335 108 : Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
336 108 : if (new_locals != locals) {
337 36 : if (!copy) {
338 36 : copy = graph()->CloneNode(frame_state);
339 : }
340 36 : copy->ReplaceInput(kFrameStateLocalsInput, new_locals);
341 : }
342 108 : return copy ? copy : frame_state;
343 : }
344 :
345 48 : bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee,
346 : Candidate const& candidate,
347 : Node** if_successes, Node** calls,
348 : Node** inputs, int input_count) {
349 : // We will try to reuse the control flow branch created for computing
350 : // the {callee} target of the call. We only reuse the branch if there
351 : // is no side-effect between the call and the branch, and if the callee is
352 : // only used as the target (and possibly also in the related frame states).
353 :
354 48 : int const num_calls = candidate.num_functions;
355 :
356 : DCHECK_EQ(IrOpcode::kPhi, callee->opcode());
357 : DCHECK_EQ(num_calls, callee->op()->ValueInputCount());
358 :
359 : // We are trying to match the following pattern:
360 : //
361 : // C1 C2
362 : // . .
363 : // | |
364 : // Merge(merge) <-----------------+
365 : // ^ ^ |
366 : // V1 V2 | | E1 E2 |
367 : // . . | +----+ . . |
368 : // | | | | | | |
369 : // Phi(callee) EffectPhi(effect_phi) |
370 : // ^ ^ |
371 : // | | |
372 : // +----+ | |
373 : // | | | |
374 : // | StateValues | |
375 : // | ^ | |
376 : // +----+ | | |
377 : // | | | | |
378 : // | FrameState | |
379 : // | ^ | |
380 : // | | | +---+
381 : // | | | | |
382 : // +----+ Checkpoint(checkpoint) |
383 : // | | ^ |
384 : // | StateValues | +-------------+
385 : // | | | |
386 : // +-----+ | | |
387 : // | | | | |
388 : // | FrameState | |
389 : // | ^ | |
390 : // +-----------+ | | |
391 : // Call(node)
392 : // |
393 : // |
394 : // .
395 : //
396 : // The {callee} here is a phi that merges the possible call targets, {node}
397 : // is the actual call that we will try to duplicate and connect to the
398 : // control that comes into {merge}. There can be a {checkpoint} between
399 : // the call and the calle phi.
400 : //
401 : // The idea is to get rid of the merge, effect phi and phi, then duplicate
402 : // the call (with all the frame states and such), and connect the duplicated
403 : // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
404 : // ex-merge. The tricky part is to make sure that there is no interference
405 : // from the outside. In particular, there should not be any unaccounted uses
406 : // of the phi, effect-phi and merge because we will remove them from
407 : // the graph.
408 : //
409 : // V1 E1 C1 V2 E2 C2
410 : // . . . . . .
411 : // | | | | | |
412 : // +----+ | | +----+ |
413 : // | | | | | | |
414 : // | StateValues | | | StateValues |
415 : // | ^ | | | ^ |
416 : // +----+ | | | +----+ | |
417 : // | | | | | | | | |
418 : // | FrameState | | | FrameState |
419 : // | ^ | | | ^ |
420 : // | | | | | | |
421 : // | | | | | | |
422 : // +----+ Checkpoint | +----+ Checkpoint |
423 : // | | ^ | | | ^ |
424 : // | StateValues | | | StateValues | |
425 : // | | | | | | | |
426 : // +-----+ | | | +-----+ | | |
427 : // | | | | | | | | | |
428 : // | FrameState | | | FrameState | |
429 : // | ^ | | | ^ | |
430 : // +-------------+| | | +-------------+| | |
431 : // Call----+ Call----+
432 : // | |
433 : // +-------+ +------------+
434 : // | |
435 : // Merge
436 : // EffectPhi
437 : // Phi
438 : // |
439 : // ...
440 :
441 : // If there is a control node between the callee computation
442 : // and the call, bail out.
443 48 : Node* merge = NodeProperties::GetControlInput(callee);
444 48 : if (NodeProperties::GetControlInput(node) != merge) return false;
445 :
446 : // If there is a non-checkpoint effect node between the callee computation
447 : // and the call, bail out. We will drop any checkpoint between the call and
448 : // the callee phi because the callee computation should have its own
449 : // checkpoint that the call can fall back to.
450 : Node* checkpoint = nullptr;
451 48 : Node* effect = NodeProperties::GetEffectInput(node);
452 48 : if (effect->opcode() == IrOpcode::kCheckpoint) {
453 : checkpoint = effect;
454 44 : if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
455 44 : effect = NodeProperties::GetEffectInput(effect);
456 : }
457 48 : if (effect->opcode() != IrOpcode::kEffectPhi) return false;
458 44 : if (NodeProperties::GetControlInput(effect) != merge) return false;
459 : Node* effect_phi = effect;
460 :
461 : // The effect phi, the callee, the call and the checkpoint must be the only
462 : // users of the merge.
463 204 : for (Node* merge_use : merge->uses()) {
464 264 : if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
465 96 : merge_use != checkpoint) {
466 : return false;
467 : }
468 : }
469 :
470 : // The effect phi must be only used by the checkpoint or the call.
471 72 : for (Node* effect_phi_use : effect_phi->uses()) {
472 36 : if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
473 : }
474 :
475 : // We must replace the callee phi with the appropriate constant in
476 : // the entire subgraph reachable by inputs from the call (terminating
477 : // at phis and merges). Since we do not want to walk (and later duplicate)
478 : // the subgraph here, we limit the possible uses to this set:
479 : //
480 : // 1. In the call (as a target).
481 : // 2. The checkpoint between the call and the callee computation merge.
482 : // 3. The lazy deoptimization frame state.
483 : //
484 : // This corresponds to the most common pattern, where the function is
485 : // called with only local variables or constants as arguments.
486 : //
487 : // To check the uses, we first collect all the occurrences of callee in 1, 2
488 : // and 3, and then we check that all uses of callee are in the collected
489 : // occurrences. If there is an unaccounted use, we do not try to rewire
490 : // the control flow.
491 : //
492 : // Note: With CFG, this would be much easier and more robust - we would just
493 : // duplicate all the nodes between the merge and the call, replacing all
494 : // occurrences of the {callee} phi with the appropriate constant.
495 :
496 : // First compute the set of uses that are only reachable from 2 and 3.
497 : const size_t kMaxUses = 8;
498 : NodeAndIndex replaceable_uses[kMaxUses];
499 36 : size_t replaceable_uses_count = 0;
500 :
501 : // Collect the uses to check case 2.
502 : Node* checkpoint_state = nullptr;
503 36 : if (checkpoint) {
504 : checkpoint_state = checkpoint->InputAt(0);
505 36 : if (!CollectFrameStateUniqueUses(callee, checkpoint_state, replaceable_uses,
506 : &replaceable_uses_count, kMaxUses)) {
507 : return false;
508 : }
509 : }
510 :
511 : // Collect the uses to check case 3.
512 36 : Node* frame_state = NodeProperties::GetFrameStateInput(node);
513 36 : if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
514 : &replaceable_uses_count, kMaxUses)) {
515 : return false;
516 : }
517 :
518 : // Bail out if there is a use of {callee} that is not reachable from 1, 2
519 : // and 3.
520 108 : for (Edge edge : callee->use_edges()) {
521 : // Case 1 (use by the call as a target).
522 72 : if (edge.from() == node && edge.index() == 0) continue;
523 : // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
524 : bool found = false;
525 36 : for (size_t i = 0; i < replaceable_uses_count; i++) {
526 108 : if (replaceable_uses[i].node == edge.from() &&
527 36 : replaceable_uses[i].index == edge.index()) {
528 : found = true;
529 : break;
530 : }
531 : }
532 36 : if (!found) return false;
533 : }
534 :
535 : // Clone the call and the framestate, including the uniquely reachable
536 : // state values, making sure that we replace the phi with the constant.
537 180 : for (int i = 0; i < num_calls; ++i) {
538 : // Clone the calls for each branch.
539 : // We need to specialize the calls to the correct target, effect, and
540 : // control. We also need to duplicate the checkpoint and the lazy
541 : // frame state, and change all the uses of the callee to the constant
542 : // callee.
543 : Node* target = callee->InputAt(i);
544 : Node* effect = effect_phi->InputAt(i);
545 : Node* control = merge->InputAt(i);
546 :
547 72 : if (checkpoint) {
548 : // Duplicate the checkpoint.
549 72 : Node* new_checkpoint_state = DuplicateFrameStateAndRename(
550 : checkpoint_state, callee, target,
551 144 : (i == num_calls - 1) ? kChangeInPlace : kCloneState);
552 : effect = graph()->NewNode(checkpoint->op(), new_checkpoint_state, effect,
553 : control);
554 : }
555 :
556 : // Duplicate the call.
557 72 : Node* new_lazy_frame_state = DuplicateFrameStateAndRename(
558 : frame_state, callee, target,
559 144 : (i == num_calls - 1) ? kChangeInPlace : kCloneState);
560 72 : inputs[0] = target;
561 72 : inputs[input_count - 3] = new_lazy_frame_state;
562 72 : inputs[input_count - 2] = effect;
563 72 : inputs[input_count - 1] = control;
564 72 : calls[i] = if_successes[i] =
565 72 : graph()->NewNode(node->op(), input_count, inputs);
566 : }
567 :
568 : // Mark the control inputs dead, so that we can kill the merge.
569 36 : node->ReplaceInput(input_count - 1, jsgraph()->Dead());
570 36 : callee->ReplaceInput(num_calls, jsgraph()->Dead());
571 36 : effect_phi->ReplaceInput(num_calls, jsgraph()->Dead());
572 36 : if (checkpoint) {
573 36 : checkpoint->ReplaceInput(2, jsgraph()->Dead());
574 : }
575 :
576 36 : merge->Kill();
577 36 : return true;
578 : }
579 :
580 48 : void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee,
581 : Candidate const& candidate,
582 : Node** if_successes,
583 : Node** calls, Node** inputs,
584 : int input_count) {
585 : SourcePositionTable::Scope position(
586 48 : source_positions_, source_positions_->GetSourcePosition(node));
587 48 : if (TryReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
588 : input_count)) {
589 : return;
590 : }
591 :
592 12 : Node* fallthrough_control = NodeProperties::GetControlInput(node);
593 12 : int const num_calls = candidate.num_functions;
594 :
595 : // Create the appropriate control flow to dispatch to the cloned calls.
596 60 : for (int i = 0; i < num_calls; ++i) {
597 : // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
598 : // instead of the target JSFunction reference directly.
599 24 : Node* target = jsgraph()->Constant(candidate.functions[i].value());
600 24 : if (i != (num_calls - 1)) {
601 : Node* check =
602 12 : graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
603 : Node* branch =
604 12 : graph()->NewNode(common()->Branch(), check, fallthrough_control);
605 12 : fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
606 24 : if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
607 : } else {
608 12 : if_successes[i] = fallthrough_control;
609 : }
610 :
611 : // Clone the calls for each branch.
612 : // The first input to the call is the actual target (which we specialize
613 : // to the known {target}); the last input is the control dependency.
614 : // We also specialize the new.target of JSConstruct {node}s if it refers
615 : // to the same node as the {node}'s target input, so that we can later
616 : // properly inline the JSCreate operations.
617 24 : if (node->opcode() == IrOpcode::kJSConstruct && inputs[0] == inputs[1]) {
618 0 : inputs[1] = target;
619 : }
620 24 : inputs[0] = target;
621 24 : inputs[input_count - 1] = if_successes[i];
622 24 : calls[i] = if_successes[i] =
623 24 : graph()->NewNode(node->op(), input_count, inputs);
624 : }
625 : }
626 :
627 65883 : Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate,
628 : bool small_function) {
629 65883 : int const num_calls = candidate.num_functions;
630 65883 : Node* const node = candidate.node;
631 65883 : if (num_calls == 1) {
632 65835 : Reduction const reduction = inliner_.ReduceJSCall(node);
633 65835 : if (reduction.Changed()) {
634 65824 : cumulative_count_ += candidate.bytecode[0].value().length();
635 : }
636 65835 : return reduction;
637 : }
638 :
639 : // Expand the JSCall/JSConstruct node to a subgraph first if
640 : // we have multiple known target functions.
641 : DCHECK_LT(1, num_calls);
642 : Node* calls[kMaxCallPolymorphism + 1];
643 : Node* if_successes[kMaxCallPolymorphism];
644 48 : Node* callee = NodeProperties::GetValueInput(node, 0);
645 :
646 : // Setup the inputs for the cloned call nodes.
647 : int const input_count = node->InputCount();
648 48 : Node** inputs = graph()->zone()->NewArray<Node*>(input_count);
649 708 : for (int i = 0; i < input_count; ++i) {
650 660 : inputs[i] = node->InputAt(i);
651 : }
652 :
653 : // Create the appropriate control flow to dispatch to the cloned calls.
654 : CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
655 48 : input_count);
656 :
657 : // Check if we have an exception projection for the call {node}.
658 48 : Node* if_exception = nullptr;
659 48 : if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
660 : Node* if_exceptions[kMaxCallPolymorphism + 1];
661 0 : for (int i = 0; i < num_calls; ++i) {
662 0 : if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
663 : if_exceptions[i] =
664 0 : graph()->NewNode(common()->IfException(), calls[i], calls[i]);
665 : }
666 :
667 : // Morph the {if_exception} projection into a join.
668 : Node* exception_control =
669 0 : graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
670 0 : if_exceptions[num_calls] = exception_control;
671 0 : Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
672 0 : num_calls + 1, if_exceptions);
673 0 : Node* exception_value = graph()->NewNode(
674 : common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
675 0 : if_exceptions);
676 : ReplaceWithValue(if_exception, exception_value, exception_effect,
677 0 : exception_control);
678 : }
679 :
680 : // Morph the original call site into a join of the dispatched call sites.
681 : Node* control =
682 48 : graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
683 48 : calls[num_calls] = control;
684 : Node* effect =
685 48 : graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
686 : Node* value =
687 48 : graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls),
688 48 : num_calls + 1, calls);
689 : ReplaceWithValue(node, value, effect, control);
690 :
691 : // Inline the individual, cloned call sites.
692 240 : for (int i = 0; i < num_calls; ++i) {
693 96 : Node* node = calls[i];
694 96 : if (candidate.can_inline_function[i] &&
695 0 : (small_function ||
696 0 : cumulative_count_ < FLAG_max_inlined_bytecode_size_cumulative)) {
697 92 : Reduction const reduction = inliner_.ReduceJSCall(node);
698 92 : if (reduction.Changed()) {
699 : // Killing the call node is not strictly necessary, but it is safer to
700 : // make sure we do not resurrect the node.
701 92 : node->Kill();
702 : // Small functions don't count towards the budget.
703 92 : if (!small_function) {
704 0 : cumulative_count_ += candidate.bytecode[i]->length();
705 : }
706 : }
707 : }
708 : }
709 :
710 : return Replace(value);
711 : }
712 :
713 189694 : bool JSInliningHeuristic::CandidateCompare::operator()(
714 : const Candidate& left, const Candidate& right) const {
715 189694 : if (right.frequency.IsUnknown()) {
716 1031 : if (left.frequency.IsUnknown()) {
717 : // If left and right are both unknown then the ordering is indeterminate,
718 : // which breaks strict weak ordering requirements, so we fall back to the
719 : // node id as a tie breaker.
720 3078 : return left.node->id() > right.node->id();
721 : }
722 : return true;
723 188663 : } else if (left.frequency.IsUnknown()) {
724 : return false;
725 188654 : } else if (left.frequency.value() > right.frequency.value()) {
726 : return true;
727 150092 : } else if (left.frequency.value() < right.frequency.value()) {
728 : return false;
729 : } else {
730 431754 : return left.node->id() > right.node->id();
731 : }
732 : }
733 :
734 0 : void JSInliningHeuristic::PrintCandidates() {
735 0 : StdoutStream os;
736 0 : os << "Candidates for inlining (size=" << candidates_.size() << "):\n";
737 0 : for (const Candidate& candidate : candidates_) {
738 : os << " #" << candidate.node->id() << ":"
739 0 : << candidate.node->op()->mnemonic()
740 0 : << ", frequency: " << candidate.frequency << std::endl;
741 0 : for (int i = 0; i < candidate.num_functions; ++i) {
742 : SharedFunctionInfoRef shared =
743 0 : candidate.functions[i].has_value()
744 : ? candidate.functions[i].value().shared()
745 0 : : candidate.shared_info.value();
746 0 : PrintF(" - size:%d, name: %s\n", candidate.bytecode[i].value().length(),
747 0 : shared.object()->DebugName()->ToCString().get());
748 : }
749 : }
750 0 : }
751 :
752 0 : Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); }
753 :
754 0 : CommonOperatorBuilder* JSInliningHeuristic::common() const {
755 0 : return jsgraph()->common();
756 : }
757 :
758 0 : SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const {
759 0 : return jsgraph()->simplified();
760 : }
761 :
762 : #undef TRACE
763 :
764 : } // namespace compiler
765 : } // namespace internal
766 121996 : } // namespace v8
|