Coverage Report

Created: 2025-01-28 06:38

/src/hermes/lib/BCGen/RegAlloc.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#include "hermes/BCGen/RegAlloc.h"
9
#include "hermes/IR/CFG.h"
10
#include "hermes/IR/IR.h"
11
#include "hermes/IR/IRBuilder.h"
12
#include "hermes/IR/Instrs.h"
13
#include "hermes/Support/PerfSection.h"
14
#include "hermes/Utils/Dumper.h"
15
16
#include "llvh/Support/Debug.h"
17
#include "llvh/Support/raw_ostream.h"
18
19
#include <algorithm>
20
#include <queue>
21
22
#define DEBUG_TYPE "regalloc"
23
24
using namespace hermes;
25
26
using llvh::dbgs;
27
28
0
raw_ostream &llvh::operator<<(raw_ostream &OS, const Register &reg) {
29
0
  if (!reg.isValid()) {
30
0
    OS << "Null";
31
0
  } else {
32
0
    OS << "Reg" << reg.getIndex();
33
0
  }
34
35
0
  return OS;
36
0
}
37
38
0
raw_ostream &llvh::operator<<(raw_ostream &OS, const Segment &segment) {
39
0
  if (segment.empty()) {
40
0
    OS << "[empty]";
41
0
    return OS;
42
0
  }
43
44
0
  OS << "[" << segment.start_ << "..." << segment.end_ << ") ";
45
0
  return OS;
46
0
}
47
48
0
raw_ostream &llvh::operator<<(raw_ostream &OS, const Interval &interval) {
49
0
  Interval t = interval.compress();
50
0
  for (auto &s : t.segments_) {
51
0
    OS << s;
52
0
  }
53
0
  return OS;
54
0
}
55
56
9.03M
bool RegisterFile::isUsed(Register r) {
57
9.03M
  return !registers.test(r.getIndex());
58
9.03M
}
59
60
5.40M
bool RegisterFile::isFree(Register r) {
61
5.40M
  return !isUsed(r);
62
5.40M
}
63
64
2.89M
void RegisterFile::killRegister(Register reg) {
65
2.89M
  LLVM_DEBUG(dbgs() << "-- Releasing the register " << reg << "\n");
66
67
2.89M
  assert(isUsed(reg) && "Killing an unused register!");
68
2.89M
  registers.set(reg.getIndex());
69
2.89M
  assert(isFree(reg) && "Error freeing register!");
70
2.89M
}
71
72
0
void RegisterFile::verify() {}
73
74
0
void RegisterFile::dump() {
75
0
  llvh::outs() << "\n";
76
0
  for (unsigned i = 0; i < registers.size(); i++) {
77
0
    llvh::outs() << (int)!registers.test(i);
78
0
  }
79
0
  llvh::outs() << "\n";
80
0
}
81
82
3.24M
Register RegisterFile::allocateRegister() {
83
  // We first check for the 'all' case because we usually have a small number of
84
  // active bits (<64), so this operation is actually faster than the linear
85
  // scan.
86
3.24M
  if (registers.none()) {
87
    // If all bits are set, create a new register and return it.
88
735k
    unsigned numRegs = registers.size();
89
735k
    registers.resize(numRegs + 1, false);
90
735k
    Register R = Register(numRegs);
91
735k
    assert(isUsed(R) && "Error allocating a new register.");
92
735k
    LLVM_DEBUG(dbgs() << "-- Creating the new register " << R << "\n");
93
735k
    return R;
94
735k
  }
95
96
  // Search for a free register to use.
97
2.50M
  int i = registers.find_first();
98
2.50M
  assert(i >= 0 && "Unexpected failure to allocate a register");
99
2.50M
  Register R(i);
100
2.50M
  LLVM_DEBUG(dbgs() << "-- Assigning the free register " << R << "\n");
101
2.50M
  assert(isFree(R) && "Error finding a free register");
102
2.50M
  registers.reset(i);
103
2.50M
  return R;
104
2.50M
}
105
106
103k
Register RegisterFile::tailAllocateConsecutive(unsigned n) {
107
103k
  assert(n > 0 && "Can't request zero registers");
108
109
103k
  int lastUsed = registers.size() - 1;
110
103k
  while (lastUsed >= 0) {
111
0
    if (!registers.test(lastUsed))
112
0
      break;
113
114
0
    lastUsed--;
115
0
  }
116
117
103k
  int firstClear = lastUsed + 1;
118
119
103k
  LLVM_DEBUG(
120
103k
      dbgs() << "-- Found the last set bit at offset " << lastUsed << "\n");
121
103k
  registers.resize(std::max(registers.size(), firstClear + n), true);
122
103k
  registers.reset(firstClear, firstClear + n);
123
124
103k
  LLVM_DEBUG(
125
103k
      dbgs() << "-- Allocated tail consecutive registers of length " << n
126
103k
             << ", starting at " << Register(firstClear) << "\n");
127
103k
  return Register(firstClear);
128
103k
}
129
130
/// \returns true if the PHI node has an external user (that requires a
131
/// register read) and a local writer.
132
4.39k
static bool phiReadWrite(PhiInst *P) {
133
4.39k
  bool localPhiUse = false;
134
4.39k
  bool externalUse = false;
135
4.39k
  bool terminatorUse = false;
136
137
4.39k
  BasicBlock *parent = P->getParent();
138
139
4.39k
  for (auto *U : P->getUsers()) {
140
4.39k
    terminatorUse |= llvh::isa<TerminatorInst>(U);
141
4.39k
    localPhiUse |=
142
4.39k
        (llvh::isa<PhiInst>(U) && U->getParent() == parent && P != U);
143
4.39k
    externalUse |= U->getParent() != parent;
144
4.39k
  }
145
146
  // TODO: the code used to perform a stricter check which missed some cases.
147
  // TODO: need to come up with a better condition.
148
  // bool localWrite = false;
149
  // for (int i = 0, limit = P->getNumEntries(); !localWrite && i < limit; ++i)
150
  // {
151
  //   auto entry = P->getEntry(i);
152
  //   localWrite |= entry.first != P && entry.second == parent;
153
  // }
154
  // return terminatorUse || localPhiUse || (externalUse && localWrite);
155
156
4.39k
  return terminatorUse || localPhiUse || externalUse;
157
4.39k
}
158
159
103k
void RegisterAllocator::lowerPhis(ArrayRef<BasicBlock *> order) {
160
103k
  llvh::SmallVector<PhiInst *, 8> PHIs;
161
103k
  IRBuilder builder(F);
162
163
  // Collect all PHIs.
164
740k
  for (auto &BB : order) {
165
5.98M
    for (auto &Inst : *BB) {
166
5.98M
      if (auto *P = llvh::dyn_cast<PhiInst>(&Inst)) {
167
4.39k
        PHIs.push_back(P);
168
4.39k
      }
169
5.98M
    }
170
740k
  }
171
172
  // The MOV sequence at the end of the block writes the values that need to go
173
  // into the PHI node registers in the jump-destination basic blocks. In the
174
  // case of cycles we may need to read a value from a current PHI node and also
175
  // prepare the value of the same PHI node for the next iteration. To make sure
176
  // that we read an up-to-date value we copy the value using a MOV before we
177
  // emit the MOV sequence and replace all external uses.
178
103k
  for (PhiInst *P : PHIs) {
179
4.39k
    if (!phiReadWrite(P))
180
4.31k
      continue;
181
182
    // The MOV sequence may clobber the PHI. Insert a copy.
183
82
    builder.setInsertionPoint(P->getParent()->getTerminator());
184
82
    auto *mov = builder.createMovInst(P);
185
186
82
    Value::UseListTy users = P->getUsers();
187
    // Update all external users:
188
164
    for (auto *U : users) {
189
      // Local uses of the PHI are allowed.
190
164
      if (!llvh::isa<PhiInst>(U) && !llvh::isa<TerminatorInst>(U) &&
191
164
          U->getParent() == P->getParent())
192
82
        continue;
193
194
82
      U->replaceFirstOperandWith(P, mov);
195
82
    }
196
82
  }
197
198
  /// A list of registers that were copied to prevent clobbering. Maps the
199
  /// original PHI node to the copied value.
200
103k
  DenseMap<Value *, MovInst *> copied;
201
202
  // Lower all PHI nodes into a sequence of MOVs in the predecessor blocks.
203
103k
  for (PhiInst *P : PHIs) {
204
13.1k
    for (unsigned i = 0, e = P->getNumEntries(); i < e; ++i) {
205
8.78k
      auto E = P->getEntry(i);
206
8.78k
      auto *term = E.second->getTerminator();
207
8.78k
      builder.setInsertionPoint(term);
208
8.78k
      auto *mov = builder.createMovInst(E.first);
209
8.78k
      P->updateEntry(i, mov, E.second);
210
211
      // If the terminator uses the value that we are inserting then we can fix
212
      // the lifetime by making it use the MOV. We can do this because we know
213
      // that terminators don't modify values in destination PHI nodes and this
214
      // allows us to merge the lifetime of the value and save a register.
215
8.78k
      copied[E.first] = mov;
216
8.78k
    }
217
4.39k
  }
218
219
  // The terminator comes after the MOV sequence, so make sure it uses the
220
  // updated registers.
221
740k
  for (auto &BB : order) {
222
740k
    auto *term = BB->getTerminator();
223
224
1.70M
    for (int i = 0, e = term->getNumOperands(); i < e; i++) {
225
965k
      auto *op = term->getOperand(i);
226
965k
      if (llvh::isa<Literal>(op))
227
0
        continue;
228
965k
      auto it = copied.find(op);
229
965k
      if (it != copied.end()) {
230
0
        if (it->second->getParent() == BB) {
231
0
          term->setOperand(it->second, i);
232
0
          it->second->moveBefore(term);
233
0
        }
234
0
      }
235
965k
    }
236
740k
  }
237
103k
}
238
239
void RegisterAllocator::calculateLocalLiveness(
240
    BlockLifetimeInfo &livenessInfo,
241
33.0k
    BasicBlock *BB) {
242
  // For each instruction in the block:
243
2.89M
  for (auto &it : *BB) {
244
2.89M
    Instruction *I = &it;
245
246
2.89M
    unsigned idx = getInstructionNumber(I);
247
2.89M
    livenessInfo.kill_.set(idx);
248
249
    // PHI nodes require special handling because they are flow sensitive. Mask
250
    // out flow that does not go in the direction of the phi edge.
251
2.89M
    if (auto *P = llvh::dyn_cast<PhiInst>(I)) {
252
16
      llvh::SmallVector<unsigned, 4> incomingValueNum;
253
254
      // Collect all incoming value numbers.
255
48
      for (int i = 0, e = P->getNumEntries(); i < e; i++) {
256
32
        auto E = P->getEntry(i);
257
        // Skip unreachable predecessors.
258
32
        if (!blockLiveness_.count(E.second))
259
0
          continue;
260
32
        if (auto *II = llvh::dyn_cast<Instruction>(E.first)) {
261
32
          incomingValueNum.push_back(getInstructionNumber(II));
262
32
        }
263
32
      }
264
265
      // Block the incoming values from flowing into the predecessors.
266
48
      for (int i = 0, e = P->getNumEntries(); i < e; i++) {
267
32
        auto E = P->getEntry(i);
268
        // Skip unreachable predecessors.
269
32
        if (!blockLiveness_.count(E.second))
270
0
          continue;
271
64
        for (auto num : incomingValueNum) {
272
64
          blockLiveness_[E.second].maskIn_.set(num);
273
64
        }
274
32
      }
275
276
      // Allow the flow of incoming values in specific directions:
277
48
      for (int i = 0, e = P->getNumEntries(); i < e; i++) {
278
32
        auto E = P->getEntry(i);
279
        // Skip unreachable predecessors.
280
32
        if (!blockLiveness_.count(E.second))
281
0
          continue;
282
32
        if (auto *II = llvh::dyn_cast<Instruction>(E.first)) {
283
32
          unsigned idxII = getInstructionNumber(II);
284
32
          blockLiveness_[E.second].maskIn_.reset(idxII);
285
32
        }
286
32
      }
287
16
    }
288
289
    // For each one of the operands that are also instructions:
290
9.75M
    for (unsigned opIdx = 0, e = I->getNumOperands(); opIdx != e; ++opIdx) {
291
6.85M
      auto *opInst = llvh::dyn_cast<Instruction>(I->getOperand(opIdx));
292
6.85M
      if (!opInst)
293
3.83M
        continue;
294
      // Skip instructions from unreachable blocks.
295
3.02M
      if (!blockLiveness_.count(opInst->getParent()))
296
0
        continue;
297
298
      // Get the index of the operand.
299
3.02M
      auto opInstIdx = getInstructionNumber(opInst);
300
3.02M
      livenessInfo.gen_.set(opInstIdx);
301
3.02M
    }
302
2.89M
  }
303
33.0k
}
304
305
#ifndef NDEBUG
306
static void dumpVector(
307
    const BitVector &bv,
308
    llvh::StringRef text,
309
0
    llvh::raw_ostream &ost = llvh::errs()) {
310
0
  ost << text;
311
0
  for (unsigned i = 0; i < bv.size(); i++) {
312
0
    ost << bv.test(i);
313
0
  }
314
0
  ost << "\n";
315
0
}
316
#endif
317
318
350
void RegisterAllocator::calculateGlobalLiveness(ArrayRef<BasicBlock *> order) {
319
350
  unsigned iterations = 0;
320
350
  bool changed = false;
321
322
  // Init the live-in vector to be GEN-KILL.
323
33.0k
  for (auto &it : blockLiveness_) {
324
33.0k
    BlockLifetimeInfo &livenessInfo = it.second;
325
33.0k
    livenessInfo.liveIn_ |= livenessInfo.gen_;
326
33.0k
    livenessInfo.liveIn_.reset(livenessInfo.kill_);
327
33.0k
    livenessInfo.liveIn_.reset(livenessInfo.maskIn_);
328
33.0k
  }
329
330
657
  do {
331
657
    iterations++;
332
657
    changed = false;
333
334
66.6k
    for (auto it = order.rbegin(), e = order.rend(); it != e; it++) {
335
66.0k
      BasicBlock *BB = *it;
336
66.0k
      BlockLifetimeInfo &livenessInfo = blockLiveness_[BB];
337
338
      // Rule:  OUT = SUCC0_in  + SUCC1_in ...
339
98.0k
      for (auto *succ : successors(BB)) {
340
98.0k
        BlockLifetimeInfo &succInfo = blockLiveness_[succ];
341
        // Check if we are about to change bits in the live-out vector.
342
98.0k
        if (succInfo.liveIn_.test(livenessInfo.liveOut_)) {
343
32.7k
          changed = true;
344
32.7k
        }
345
346
98.0k
        livenessInfo.liveOut_ |= succInfo.liveIn_;
347
98.0k
      }
348
349
      // Rule: In = gen + (OUT - KILL)
350
      // After initializing 'in' to 'gen' - 'kill', the only way for the result
351
      // to change is for 'out' to change, and we check if 'out' changes above.
352
66.0k
      livenessInfo.liveIn_ = livenessInfo.liveOut_;
353
66.0k
      livenessInfo.liveIn_ |= livenessInfo.gen_;
354
66.0k
      livenessInfo.liveIn_.reset(livenessInfo.kill_);
355
66.0k
      livenessInfo.liveIn_.reset(livenessInfo.maskIn_);
356
66.0k
    }
357
657
  } while (changed);
358
359
350
#ifndef NDEBUG
360
33.0k
  for (auto &it : blockLiveness_) {
361
33.0k
    BasicBlock *BB = it.first;
362
33.0k
    BlockLifetimeInfo &livenessInfo = it.second;
363
33.0k
    LLVM_DEBUG(llvh::dbgs() << "Block " << BB << "\n");
364
33.0k
    LLVM_DEBUG(dumpVector(livenessInfo.gen_, "gen     ", llvh::dbgs()));
365
33.0k
    LLVM_DEBUG(dumpVector(livenessInfo.kill_, "kill    ", llvh::dbgs()));
366
33.0k
    LLVM_DEBUG(dumpVector(livenessInfo.liveIn_, "liveIn  ", llvh::dbgs()));
367
33.0k
    LLVM_DEBUG(dumpVector(livenessInfo.liveOut_, "liveOut ", llvh::dbgs()));
368
33.0k
    LLVM_DEBUG(dumpVector(livenessInfo.maskIn_, "maskIn  ", llvh::dbgs()));
369
33.0k
    LLVM_DEBUG(llvh::dbgs() << "------\n");
370
33.0k
  }
371
350
#endif
372
373
350
  LLVM_DEBUG(
374
350
      dbgs() << "Completed liveness in " << iterations << " iterations\n");
375
  // Suppress -Wunused-but-set-variable warning with new compilers.
376
350
  (void)iterations;
377
350
}
378
379
0
Interval &RegisterAllocator::getInstructionInterval(Instruction *I) {
380
0
  auto idx = getInstructionNumber(I);
381
0
  return instructionInterval_[idx];
382
0
}
383
384
Register RegisterAllocator::getRegisterForInstructionAt(
385
    Instruction *Value,
386
0
    Instruction *At) {
387
0
  if (hasInstructionNumber(Value) && hasInstructionNumber(At)) {
388
0
    Register valueReg = getRegister(Value);
389
0
    unsigned loc = getInstructionNumber(At);
390
0
    if (valueReg.isValid()) {
391
      // Check all of valueReg's intervals, and see if any of those contain loc.
392
0
      const Interval &i = getInstructionInterval(Value);
393
0
      auto itBegin = i.segments_.begin(), itEnd = i.segments_.end();
394
0
      if (std::find_if(itBegin, itEnd, [loc](const Segment &s) {
395
0
            return s.contains(loc);
396
0
          })) {
397
0
        return valueReg;
398
0
      }
399
0
    }
400
0
  }
401
402
  // Value is not available at At; return an invalid register.
403
0
  return Register{};
404
0
}
405
406
40
bool RegisterAllocator::isManuallyAllocatedInterval(Instruction *I) {
407
40
  if (hasTargetSpecificLowering(I))
408
0
    return true;
409
410
40
  for (auto *U : I->getUsers()) {
411
40
    if (hasTargetSpecificLowering(U))
412
0
      return true;
413
40
  }
414
415
40
  return false;
416
40
}
417
418
void RegisterAllocator::coalesce(
419
    DenseMap<Instruction *, Instruction *> &map,
420
350
    ArrayRef<BasicBlock *> order) {
421
  // Merge all PHI nodes into a single interval. This part is required for
422
  // correctness because it bounds the MOV and the PHIs into a single interval.
423
33.0k
  for (BasicBlock *BB : order) {
424
2.89M
    for (Instruction &I : *BB) {
425
2.89M
      auto *P = llvh::dyn_cast<PhiInst>(&I);
426
2.89M
      if (!P)
427
2.89M
        continue;
428
429
16
      unsigned phiNum = getInstructionNumber(P);
430
48
      for (unsigned i = 0, e = P->getNumEntries(); i < e; ++i) {
431
32
        auto *mov = cast<MovInst>(P->getEntry(i).first);
432
433
        // Bail out if the interval is already mapped, like in the case of self
434
        // edges.
435
32
        if (map.count(mov))
436
0
          continue;
437
438
32
        if (!hasInstructionNumber(mov))
439
0
          continue;
440
441
32
        unsigned idx = getInstructionNumber(mov);
442
32
        instructionInterval_[phiNum].add(instructionInterval_[idx]);
443
444
        // Record the fact that the mov should use the same register as the phi.
445
32
        map[mov] = P;
446
32
      }
447
16
    }
448
33.0k
  }
449
450
  // Given a sequence of MOVs that was generated by the PHI lowering, try to
451
  // shorten the lifetime of intervals by reusing copies. For example, we
452
  // shorten the lifetime of %0 by making %2 use %1.
453
  // %1 = MovInst %0
454
  // %2 = MovInst %0
455
33.0k
  for (BasicBlock *BB : order) {
456
33.0k
    DenseMap<Value *, MovInst *> lastCopy;
457
458
2.89M
    for (Instruction &I : *BB) {
459
2.89M
      auto *mov = llvh::dyn_cast<MovInst>(&I);
460
2.89M
      if (!mov)
461
2.89M
        continue;
462
463
40
      Value *op = mov->getSingleOperand();
464
40
      if (llvh::isa<Literal>(op))
465
0
        continue;
466
467
      // If we've made a copy inside this basic block then use the copy.
468
40
      auto it = lastCopy.find(op);
469
40
      if (it != lastCopy.end()) {
470
0
        mov->setOperand(it->second, 0);
471
0
      }
472
473
40
      lastCopy[op] = mov;
474
40
    }
475
33.0k
  }
476
477
  // Optimize the program by coalescing multiple live intervals into a single
478
  // long interval. This phase is optional.
479
33.0k
  for (BasicBlock *BB : order) {
480
2.89M
    for (Instruction &I : *BB) {
481
2.89M
      auto *mov = llvh::dyn_cast<MovInst>(&I);
482
2.89M
      if (!mov)
483
2.89M
        continue;
484
485
40
      auto *op = llvh::dyn_cast<Instruction>(mov->getSingleOperand());
486
40
      if (!op)
487
0
        continue;
488
489
      // Don't coalesce intervals that are already coalesced to other intervals
490
      // or that there are other intervals that are coalesced into it, or if
491
      // the interval is pre-allocated.
492
40
      if (map.count(op) || isAllocated(op) || isAllocated(mov))
493
0
        continue;
494
495
      // If the MOV is already coalesced into some other interval then merge the
496
      // operand into that interval.
497
40
      Instruction *dest = mov;
498
499
      // Don't handle instructions with target specific lowering because this
500
      // means that we won't release them (and call the target specific hook)
501
      // until the whole register is freed.
502
40
      if (isManuallyAllocatedInterval(op))
503
0
        continue;
504
505
      // If the mov is already merged into another interval then find the
506
      // destination interval and try to merge the current interval into it.
507
72
      while (map.count(dest)) {
508
32
        dest = map[dest];
509
32
      }
510
511
40
      unsigned destIdx = getInstructionNumber(dest);
512
40
      unsigned opIdx = getInstructionNumber(op);
513
40
      Interval &destIvl = instructionInterval_[destIdx];
514
40
      Interval &opIvl = instructionInterval_[opIdx];
515
516
40
      if (destIvl.intersects(opIvl))
517
16
        continue;
518
519
24
      LLVM_DEBUG(
520
24
          dbgs() << "Coalescing instruction @" << opIdx << "  " << opIvl
521
24
                 << " -> @" << destIdx << "  " << destIvl << "\n");
522
523
252
      for (auto &it : map) {
524
252
        if (it.second == op) {
525
24
          LLVM_DEBUG(
526
24
              dbgs() << "Remapping @" << getInstructionNumber(it.first)
527
24
                     << " from @" << opIdx << " to @" << destIdx << "\n");
528
24
          it.second = dest;
529
24
        }
530
252
      }
531
532
24
      instructionInterval_[destIdx].add(opIvl);
533
24
      map[op] = dest;
534
24
    }
535
33.0k
  }
536
350
}
537
538
102k
void RegisterAllocator::allocateFastPass(ArrayRef<BasicBlock *> order) {
539
  // Make sure Phis and related Movs get the same register
540
707k
  for (auto *bb : order) {
541
3.09M
    for (auto &inst : *bb) {
542
3.09M
      handleInstruction(&inst);
543
3.09M
      if (auto *phi = llvh::dyn_cast<PhiInst>(&inst)) {
544
4.37k
        auto reg = file.allocateRegister();
545
4.37k
        updateRegister(phi, reg);
546
13.1k
        for (int i = 0, e = phi->getNumEntries(); i < e; i++) {
547
8.75k
          updateRegister(phi->getEntry(i).first, reg);
548
8.75k
        }
549
4.37k
      }
550
3.09M
    }
551
707k
  }
552
553
  // Bit vector indicating whether a register with a given index is being used
554
  // as a block local register.
555
102k
  llvh::BitVector blockLocals;
556
557
  // List of free block local registers. We have to maintain this outside the
558
  // file because we cannot determine interference between local and global
559
  // registers. So we have to ensure that the local registers are only reused
560
  // for other block-local instructions.
561
102k
  llvh::SmallVector<Register, 8> freeBlockLocals;
562
563
  // A dummy register used for all instructions that have no users.
564
102k
  Register deadReg = file.allocateRegister();
565
566
  // Iterate in reverse, so we can cheaply determine whether an instruction
567
  // is local, and assign it a register accordingly.
568
707k
  for (auto *bb : llvh::reverse(order)) {
569
3.09M
    for (auto &inst : llvh::reverse(*bb)) {
570
3.09M
      if (isAllocated(&inst)) {
571
        // If this is using a local register, we know the register is free after
572
        // we visit the definition.
573
1.70M
        auto reg = getRegister(&inst);
574
1.70M
        auto idx = reg.getIndex();
575
1.70M
        if (idx < blockLocals.size() && blockLocals.test(idx))
576
1.45M
          freeBlockLocals.push_back(reg);
577
1.70M
      } else {
578
        // Unallocated instruction means the result is dead, because all users
579
        // are visited first. Allocate a temporary register.
580
        // Note that we cannot assert that the instruction has no users, because
581
        // there may be users in dead blocks.
582
1.38M
        updateRegister(&inst, deadReg);
583
1.38M
      }
584
585
      // Allocate a register to unallocated operands.
586
7.29M
      for (size_t i = 0, e = inst.getNumOperands(); i < e; ++i) {
587
4.20M
        auto *op = llvh::dyn_cast<Instruction>(inst.getOperand(i));
588
589
        // Skip if op is not an instruction or already has a register.
590
4.20M
        if (!op || isAllocated(op))
591
2.61M
          continue;
592
593
1.59M
        if (op->getParent() != bb) {
594
          // Live across blocks, allocate a global regigster.
595
135k
          updateRegister(op, file.allocateRegister());
596
135k
          continue;
597
135k
        }
598
599
        // We know this operand is local because:
600
        // 1. The operand is in the same block as this one.
601
        // 2. All blocks dominated by this block have been visited.
602
        // 3. All users must be dominated by their def, since Phis are
603
        //    allocated beforehand.
604
1.45M
        if (!freeBlockLocals.empty()) {
605
1.35M
          updateRegister(op, freeBlockLocals.pop_back_val());
606
1.35M
          continue;
607
1.35M
        }
608
609
        // No free local register, allocate another one.
610
104k
        Register reg = file.allocateRegister();
611
104k
        if (blockLocals.size() <= reg.getIndex())
612
104k
          blockLocals.resize(reg.getIndex() + 1);
613
104k
        blockLocals.set(reg.getIndex());
614
104k
        updateRegister(op, reg);
615
104k
      }
616
3.09M
    }
617
707k
  }
618
102k
}
619
620
103k
void RegisterAllocator::allocate(ArrayRef<BasicBlock *> order) {
621
103k
  PerfSection regAlloc("Register Allocation");
622
623
  // Lower PHI nodes into a sequence of MOVs.
624
103k
  lowerPhis(order);
625
626
103k
  {
627
    // We have two forms of register allocation: classic and fast pass.
628
    // Classic allocation calculates and merges liveness intervals, fast pass
629
    // just assigns sequentually. The fast pass can be enabled for small
630
    // functions where runtime memory savings will be small, and for large
631
    // functions where degenerate behavior can inflate compile time memory
632
    // usage.
633
634
103k
    unsigned int instructionCount = 0;
635
740k
    for (const auto &BB : order) {
636
740k
      instructionCount += BB->getInstList().size();
637
740k
    }
638
    // We allocate five bits per instruction per basicblock in our liveness
639
    // sets.
640
103k
    uint64_t estimatedMemoryUsage =
641
103k
        (uint64_t)order.size() * instructionCount * 5 / 8;
642
103k
    if (instructionCount < fastPassThreshold ||
643
103k
        estimatedMemoryUsage > memoryLimit) {
644
102k
      allocateFastPass(order);
645
102k
      return;
646
102k
    }
647
103k
  }
648
649
  // Number instructions:
650
33.0k
  for (auto *BB : order) {
651
2.89M
    for (auto &it : *BB) {
652
2.89M
      Instruction *I = &it;
653
2.89M
      auto idx = getInstructionNumber(I);
654
2.89M
      (void)idx;
655
2.89M
      assert(idx == getInstructionNumber(I) && "Invalid numbering");
656
2.89M
    }
657
33.0k
  }
658
659
  // Init the basic block liveness data structure and calculate the local
660
  // liveness for each basic block.
661
350
  unsigned maxIdx = getMaxInstrIndex();
662
33.0k
  for (auto *BB : order) {
663
33.0k
    blockLiveness_[BB].init(maxIdx);
664
33.0k
  }
665
33.0k
  for (auto *BB : order) {
666
33.0k
    calculateLocalLiveness(blockLiveness_[BB], BB);
667
33.0k
  }
668
669
  // Propagate the local liveness information across the whole function.
670
350
  calculateGlobalLiveness(order);
671
672
  // Calculate the live intervals for each instruction.
673
350
  calculateLiveIntervals(order);
674
675
  // Free the memory used for liveness.
676
350
  blockLiveness_.clear();
677
678
  // Maps coalesced instructions. First uses the register allocated for Second.
679
350
  DenseMap<Instruction *, Instruction *> coalesced;
680
681
350
  coalesce(coalesced, order);
682
683
  // Compare two intervals and return the one that starts first.
684
15.8M
  auto startsFirst = [&](unsigned a, unsigned b) {
685
15.8M
    Interval &IA = instructionInterval_[a];
686
15.8M
    Interval &IB = instructionInterval_[b];
687
15.8M
    return IA.start() < IB.start() || (IA.start() == IB.start() && a < b);
688
15.8M
  };
689
690
  // Compare two intervals and return the one that starts first. If two
691
  // intervals end at the same place, schedule the instruction before the
692
  // operands.
693
694
78.9M
  auto endsFirst = [&](unsigned a, unsigned b) {
695
78.9M
    auto &aInterval = instructionInterval_[a];
696
78.9M
    auto &bInterval = instructionInterval_[b];
697
78.9M
    if (bInterval.end() == aInterval.end()) {
698
17.5M
      return bInterval.start() > aInterval.start() ||
699
17.5M
          (bInterval.start() == aInterval.start() && b > a);
700
17.5M
    }
701
61.3M
    return bInterval.end() > aInterval.end();
702
78.9M
  };
703
704
350
  using InstList = llvh::SmallVector<unsigned, 32>;
705
706
350
  std::priority_queue<unsigned, InstList, decltype(endsFirst)> intervals(
707
350
      endsFirst);
708
709
2.89M
  for (int i = 0, e = getMaxInstrIndex(); i < e; i++) {
710
2.89M
    intervals.push(i);
711
2.89M
  }
712
713
350
  std::priority_queue<unsigned, InstList, decltype(startsFirst)>
714
350
      liveIntervalsQueue(startsFirst);
715
716
  // Perform the register allocation:
717
2.89M
  while (!intervals.empty()) {
718
2.89M
    unsigned instIdx = intervals.top();
719
2.89M
    intervals.pop();
720
2.89M
    Instruction *inst = instructionsByNumbers_[instIdx];
721
2.89M
    Interval &instInterval = instructionInterval_[instIdx];
722
2.89M
    unsigned currentIndex = instInterval.end();
723
724
2.89M
    LLVM_DEBUG(
725
2.89M
        dbgs() << "Looking at index " << currentIndex << ": " << instInterval
726
2.89M
               << " " << inst->getName() << "\n");
727
728
    // Free all of the intervals that start after the current index.
729
5.79M
    while (!liveIntervalsQueue.empty()) {
730
5.79M
      LLVM_DEBUG(
731
5.79M
          dbgs() << "\t Cleaning up for index " << currentIndex << " PQ("
732
5.79M
                 << liveIntervalsQueue.size() << ")\n");
733
734
5.79M
      unsigned topIdx = liveIntervalsQueue.top();
735
5.79M
      Interval &range = instructionInterval_[topIdx];
736
5.79M
      LLVM_DEBUG(dbgs() << "\t Earliest interval: " << range << "\n");
737
738
      // Flush empty intervals and intervals that finished after our index.
739
5.79M
      bool nonEmptyInterval = range.size();
740
5.79M
      if (range.start() < currentIndex && nonEmptyInterval) {
741
2.89M
        break;
742
2.89M
      }
743
744
2.89M
      liveIntervalsQueue.pop();
745
746
2.89M
      Instruction *I = instructionsByNumbers_[topIdx];
747
2.89M
      Register R = getRegister(I);
748
2.89M
      LLVM_DEBUG(
749
2.89M
          dbgs() << "\t Reached idx #" << currentIndex << " deleting inverval "
750
2.89M
                 << range << " that's allocated to register " << R
751
2.89M
                 << " used by instruction " << I->getName() << "\n");
752
2.89M
      file.killRegister(R);
753
754
2.89M
      handleInstruction(I);
755
2.89M
    }
756
757
    // Don't try to allocate registers that were merged into other live
758
    // intervals.
759
2.89M
    if (coalesced.count(inst)) {
760
56
      continue;
761
56
    }
762
763
    // Allocate a register for the live interval that we are currently handling.
764
2.89M
    if (!isAllocated(inst)) {
765
2.89M
      Register R = file.allocateRegister();
766
2.89M
      updateRegister(inst, R);
767
2.89M
    }
768
769
    // Mark the current instruction as live and remember to perform target
770
    // specific calls when we are done with the bundle.
771
2.89M
    liveIntervalsQueue.push(instIdx);
772
2.89M
  } // For each instruction in the function.
773
774
  // Free the remaining intervals.
775
1.01k
  while (!liveIntervalsQueue.empty()) {
776
662
    Instruction *I = instructionsByNumbers_[liveIntervalsQueue.top()];
777
662
    LLVM_DEBUG(
778
662
        dbgs() << "Free register used by instruction " << I->getName() << "\n");
779
662
    file.killRegister(getRegister(I));
780
662
    handleInstruction(I);
781
662
    liveIntervalsQueue.pop();
782
662
  }
783
784
  // Allocate registers for the coalesced registers.
785
350
  for (auto &RP : coalesced) {
786
56
    assert(!isAllocated(RP.first) && "Register should not be allocated");
787
56
    Instruction *dest = RP.second;
788
56
    updateRegister(RP.first, getRegister(dest));
789
56
  }
790
350
}
791
792
350
void RegisterAllocator::calculateLiveIntervals(ArrayRef<BasicBlock *> order) {
793
  /// Calculate the live intervals for each instruction. Start with a list of
794
  /// intervals that only contain the instruction itself.
795
2.89M
  for (int i = 0, e = instructionsByNumbers_.size(); i < e; ++i) {
796
    // The instructions are ordered consecutively. The start offset of the
797
    // instruction is the index in the array plus one because the value starts
798
    // to live on the next instruction.
799
2.89M
    instructionInterval_[i] = Interval(i + 1, i + 1);
800
2.89M
  }
801
802
  // For each basic block in the liveness map:
803
33.0k
  for (BasicBlock *BB : order) {
804
33.0k
    BlockLifetimeInfo &liveness = blockLiveness_[BB];
805
806
33.0k
    auto startOffset = getInstructionNumber(&*BB->begin());
807
33.0k
    auto endOffset = getInstructionNumber(BB->getTerminator());
808
809
    // Register fly-through basic blocks (basic blocks where the value enters)
810
    // and leavs without doing anything to any of the operands.
811
29.5M
    for (int i = 0, e = liveness.liveOut_.size(); i < e; i++) {
812
29.5M
      bool leavs = liveness.liveOut_.test(i);
813
29.5M
      bool enters = liveness.liveIn_.test(i);
814
29.5M
      if (leavs && enters) {
815
902k
        instructionInterval_[i].add(Segment(startOffset, endOffset + 1));
816
902k
      }
817
29.5M
    }
818
819
    // For each instruction in the block:
820
2.89M
    for (auto &it : *BB) {
821
2.89M
      auto instOffset = getInstructionNumber(&it);
822
      // The instruction is defined in this basic block. Check if it is leaving
823
      // the basic block extend the interval until the end of the block.
824
2.89M
      if (liveness.liveOut_.test(instOffset)) {
825
16.4k
        instructionInterval_[instOffset].add(
826
16.4k
            Segment(instOffset + 1, endOffset + 1));
827
16.4k
        assert(
828
16.4k
            !liveness.liveIn_.test(instOffset) &&
829
16.4k
            "Livein but also killed in this block?");
830
16.4k
      }
831
832
      // Extend the lifetime of the operands.
833
9.75M
      for (int i = 0, e = it.getNumOperands(); i < e; i++) {
834
6.85M
        auto instOp = llvh::dyn_cast<Instruction>(it.getOperand(i));
835
6.85M
        if (!instOp)
836
3.83M
          continue;
837
838
3.02M
        if (!hasInstructionNumber(instOp)) {
839
0
          assert(
840
0
              llvh::isa<PhiInst>(&it) &&
841
0
              "Only PhiInst should reference values from dead code");
842
0
          continue;
843
0
        }
844
845
3.02M
        auto operandIdx = getInstructionNumber(instOp);
846
        // Extend the lifetime of the interval to reach this instruction.
847
        // Include this instruction in the interval in order to make sure that
848
        // the register is not freed before the use.
849
850
3.02M
        auto start = operandIdx + 1;
851
3.02M
        auto end = instOffset + 1;
852
3.02M
        if (start < end) {
853
3.02M
          auto seg = Segment(operandIdx + 1, instOffset + 1);
854
3.02M
          instructionInterval_[operandIdx].add(seg);
855
3.02M
        }
856
3.02M
      }
857
858
      // Extend the lifetime of the PHI to include the source basic blocks.
859
2.89M
      if (auto *P = llvh::dyn_cast<PhiInst>(&it)) {
860
48
        for (int i = 0, e = P->getNumEntries(); i < e; i++) {
861
32
          auto E = P->getEntry(i);
862
          // PhiInsts may reference instructions from dead code blocks
863
          // (which will be unnumbered and unallocated). Since the edge
864
          // is necessarily also dead, we can just skip it.
865
32
          if (!hasInstructionNumber(E.second->getTerminator()))
866
0
            continue;
867
868
32
          unsigned termIdx = getInstructionNumber(E.second->getTerminator());
869
32
          Segment S(termIdx, termIdx + 1);
870
32
          instructionInterval_[instOffset].add(S);
871
872
          // Extend the lifetime of the predecessor to the end of the BB.
873
32
          if (auto *instOp = llvh::dyn_cast<Instruction>(E.first)) {
874
32
            auto predIdx = getInstructionNumber(instOp);
875
32
            auto S2 = Segment(predIdx + 1, termIdx);
876
32
            instructionInterval_[predIdx].add(S2);
877
32
          }
878
32
        } // each pred.
879
16
      }
880
881
2.89M
    } // for each instruction in the block.
882
33.0k
  } // for each block.
883
350
}
884
885
struct LivenessRegAllocIRPrinter : IRPrinter {
886
  RegisterAllocator &allocator;
887
888
  explicit LivenessRegAllocIRPrinter(
889
      RegisterAllocator &RA,
890
      llvh::raw_ostream &ost,
891
      bool escape = false)
892
0
      : IRPrinter(RA.getContext(), ost, escape), allocator(RA) {}
893
894
0
  void printInstructionDestination(Instruction *I) override {
895
0
    if (!allocator.isAllocated(I)) {
896
0
      os << "$??? ";
897
0
    } else {
898
0
      os << "$" << allocator.getRegister(I) << " ";
899
0
    }
900
901
0
    if (allocator.hasInstructionNumber(I)) {
902
0
      auto idx = allocator.getInstructionNumber(I);
903
0
      Interval &ivl = allocator.getInstructionInterval(I);
904
0
      os << "@" << idx << " " << ivl << "\t";
905
0
    } else {
906
0
      os << "          \t";
907
0
    }
908
909
0
    IRPrinter::printInstructionDestination(I);
910
0
  }
911
912
0
  void printValueLabel(Instruction *I, Value *V, unsigned opIndex) override {
913
0
    IRPrinter::printValueLabel(I, V, opIndex);
914
0
    auto codeGenOpts = I->getContext().getCodeGenerationSettings();
915
0
    if (codeGenOpts.dumpOperandRegisters && allocator.isAllocated(V)) {
916
0
      os << " @ $" << allocator.getRegister(V);
917
0
    }
918
0
  }
919
};
920
921
0
void RegisterAllocator::dump() {
922
0
  LivenessRegAllocIRPrinter Printer(*this, llvh::outs());
923
0
  Printer.visitFunction(*F);
924
0
}
925
926
37.7M
Register RegisterAllocator::getRegister(Value *I) {
927
37.7M
  assert(isAllocated(I) && "Instruction is not allocated!");
928
37.7M
  return allocated[I];
929
37.7M
}
930
931
14.7M
void RegisterAllocator::updateRegister(Value *I, Register R) {
932
14.7M
  allocated[I] = R;
933
14.7M
}
934
935
63.4M
bool RegisterAllocator::isAllocated(Value *I) {
936
63.4M
  return allocated.count(I);
937
63.4M
}
938
939
0
Register RegisterAllocator::reserve(unsigned count) {
940
0
  return file.tailAllocateConsecutive(count);
941
0
}
942
943
103k
Register RegisterAllocator::reserve(ArrayRef<Value *> values) {
944
103k
  assert(!values.empty() && "Can't reserve zero registers");
945
103k
  Register first = file.tailAllocateConsecutive(values.size());
946
947
103k
  Register T = first;
948
103k
  for (auto *v : values) {
949
103k
    if (v)
950
103k
      allocated[v] = T;
951
103k
    T = T.getConsecutive();
952
103k
  }
953
954
103k
  return first;
955
103k
}
956
957
3.02M
bool RegisterAllocator::hasInstructionNumber(Instruction *I) {
958
3.02M
  return instructionNumbers_.count(I);
959
3.02M
}
960
961
17.7M
unsigned RegisterAllocator::getInstructionNumber(Instruction *I) {
962
17.7M
  auto it = instructionNumbers_.find(I);
963
17.7M
  if (it != instructionNumbers_.end()) {
964
14.8M
    return it->second;
965
14.8M
  }
966
967
2.89M
  instructionsByNumbers_.push_back(I);
968
2.89M
  instructionInterval_.push_back(Interval());
969
970
2.89M
  unsigned newIdx = instructionsByNumbers_.size() - 1;
971
2.89M
  instructionNumbers_[I] = newIdx;
972
2.89M
  return newIdx;
973
17.7M
}
974
975
0
unsigned llvh::DenseMapInfo<Register>::getHashValue(Register Val) {
976
0
  return Val.getIndex();
977
0
}
978
979
0
bool llvh::DenseMapInfo<Register>::isEqual(Register LHS, Register RHS) {
980
0
  return LHS.getIndex() == RHS.getIndex();
981
0
}
982
983
namespace {
984
4.28M
static bool preallocateScopeRegisters(const Context &c) {
985
4.28M
  return c.getDebugInfoSetting() == DebugInfoSetting::ALL;
986
4.28M
}
987
} // namespace
988
989
ScopeRegisterAnalysis::ScopeRegisterAnalysis(Function *F, RegisterAllocator &RA)
990
103k
    : RA_(RA) {
991
  // Initialize the ScopeDesc -> ScopeCreationInst map; if emitting full debug
992
  // info, scopeCreationInsts will be used to reserve/pre-allocate the
993
  // environment registers for scopes in F.
994
103k
  llvh::SmallVector<Value *, 4> scopeCreationInsts;
995
843k
  for (BasicBlock &BB : *F) {
996
6.18M
    for (Instruction &I : BB) {
997
6.18M
      if (llvh::isa<HBCResolveEnvironment>(I)) {
998
        // HBCResolveEnvironment instructions are used to fetch an environment
999
        // outside of F. The debugger doesn't need access to these "resolved"
1000
        // environment.
1001
0
        continue;
1002
6.18M
      } else if (auto *SCI = llvh::dyn_cast<ScopeCreationInst>(&I)) {
1003
103k
        assert(
1004
103k
            SCI->getCreatedScopeDesc()->getFunction() == F &&
1005
103k
            "ScopeCreationInst that is creating a scope of another function");
1006
103k
        scopeCreationInsts_[SCI->getCreatedScopeDesc()] = SCI;
1007
103k
        if (preallocateScopeRegisters(F->getContext())) {
1008
103k
          LLVM_DEBUG(
1009
103k
              llvh::dbgs() << "Reserving register for ScopeCreationInst in "
1010
103k
                           << F->getOriginalOrInferredName() << "\n");
1011
103k
          scopeCreationInsts.push_back(SCI);
1012
103k
        }
1013
103k
      }
1014
6.18M
    }
1015
843k
  }
1016
1017
103k
  if (!scopeCreationInsts.empty()) {
1018
103k
    RA_.reserve(scopeCreationInsts);
1019
103k
  }
1020
103k
}
1021
1022
std::pair<Register, ScopeDesc *> ScopeRegisterAnalysis::registerAndScopeAt(
1023
    Instruction *Value,
1024
0
    ScopeCreationInst *SCI) {
1025
  // If SCI's value is available in a register, ensure that the register is
1026
  // holding SCI's result at loc.
1027
0
  Register sciReg = RA_.getRegisterForInstructionAt(Value, SCI);
1028
0
  if (sciReg.isValid()) {
1029
0
    return std::make_pair(sciReg, SCI->getCreatedScopeDesc());
1030
0
  }
1031
1032
  // sciReg is not alive at Value, so try to see if SCI's parent scope is.
1033
0
  auto parentIt =
1034
0
      scopeCreationInsts_.find(SCI->getCreatedScopeDesc()->getParent());
1035
0
  if (parentIt == scopeCreationInsts_.end()) {
1036
    // SCI's Parent scope is not available in any register.
1037
0
    return std::make_pair(Register{}, nullptr);
1038
0
  }
1039
1040
  // Try again, this time on SCI's parent Environment.
1041
0
  return registerAndScopeAt(Value, parentIt->second);
1042
0
}
1043
1044
std::pair<Register, ScopeDesc *>
1045
4.18M
ScopeRegisterAnalysis::registerAndScopeForInstruction(Instruction *Inst) {
1046
4.18M
  if (ScopeDesc *originalScope = Inst->getSourceLevelScope()) {
1047
4.18M
    auto sciIt = scopeCreationInsts_.find(originalScope);
1048
4.18M
    if (sciIt != scopeCreationInsts_.end()) {
1049
4.18M
      ScopeCreationInst *originalScopeCreation = sciIt->second;
1050
4.18M
      if (!preallocateScopeRegisters(Inst->getContext())) {
1051
0
        return registerAndScopeAt(Inst, originalScopeCreation);
1052
0
      }
1053
      // Use the pre-allocated registers. This is needed because RA_ could have
1054
      // decided to use fast allocation, in which case instructions won't be
1055
      // numbered (and intervals won't be computed), meaning registerAndScopeAt
1056
      // above will not find the scope register.
1057
4.18M
      assert(RA_.isAllocated(originalScopeCreation) && "should be allocated");
1058
4.18M
      Register sciReg = RA_.getRegister(originalScopeCreation);
1059
4.18M
      assert(sciReg.isValid() && "scope register should be valid.");
1060
4.18M
      return std::make_pair(sciReg, originalScope);
1061
4.18M
    }
1062
4.18M
  }
1063
1064
0
  return std::make_pair(Register{}, nullptr);
1065
4.18M
}
1066
1067
#undef DEBUG_TYPE