/src/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // \file |
10 | | // Uses profile information to split out cold blocks. |
11 | | // |
12 | | // This pass splits out cold machine basic blocks from the parent function. This |
13 | | // implementation leverages the basic block section framework. Blocks marked |
14 | | // cold by this pass are grouped together in a separate section prefixed with |
15 | | // ".text.unlikely.*". The linker can then group these together as a cold |
16 | | // section. The split part of the function is a contiguous region identified by |
17 | | // the symbol "foo.cold". Grouping all cold blocks across functions together |
18 | | // decreases fragmentation and improves icache and itlb utilization. Note that |
19 | | // the overall changes to the binary size are negligible; only a small number of |
20 | | // additional jump instructions may be introduced. |
21 | | // |
22 | | // For the original RFC of this pass please see |
23 | | // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ |
24 | | //===----------------------------------------------------------------------===// |
25 | | |
26 | | #include "llvm/ADT/SmallVector.h" |
27 | | #include "llvm/Analysis/BlockFrequencyInfo.h" |
28 | | #include "llvm/Analysis/BranchProbabilityInfo.h" |
29 | | #include "llvm/Analysis/EHUtils.h" |
30 | | #include "llvm/Analysis/ProfileSummaryInfo.h" |
31 | | #include "llvm/CodeGen/BasicBlockSectionUtils.h" |
32 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
33 | | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
34 | | #include "llvm/CodeGen/MachineFunction.h" |
35 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
36 | | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | | #include "llvm/CodeGen/Passes.h" |
38 | | #include "llvm/CodeGen/TargetInstrInfo.h" |
39 | | #include "llvm/IR/Function.h" |
40 | | #include "llvm/InitializePasses.h" |
41 | | #include "llvm/Support/CommandLine.h" |
42 | | #include <optional> |
43 | | |
44 | | using namespace llvm; |
45 | | |
46 | | // FIXME: This cutoff value is CPU dependent and should be moved to |
47 | | // TargetTransformInfo once we consider enabling this on other platforms. |
48 | | // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. |
49 | | // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. |
50 | | // The default was empirically determined to be optimal when considering cutoff |
51 | | // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on |
52 | | // Intel CPUs. |
53 | | static cl::opt<unsigned> |
54 | | PercentileCutoff("mfs-psi-cutoff", |
55 | | cl::desc("Percentile profile summary cutoff used to " |
56 | | "determine cold blocks. Unused if set to zero."), |
57 | | cl::init(999950), cl::Hidden); |
58 | | |
59 | | static cl::opt<unsigned> ColdCountThreshold( |
60 | | "mfs-count-threshold", |
61 | | cl::desc( |
62 | | "Minimum number of times a block must be executed to be retained."), |
63 | | cl::init(1), cl::Hidden); |
64 | | |
65 | | static cl::opt<bool> SplitAllEHCode( |
66 | | "mfs-split-ehcode", |
67 | | cl::desc("Splits all EH code and it's descendants by default."), |
68 | | cl::init(false), cl::Hidden); |
69 | | |
70 | | namespace { |
71 | | |
72 | | class MachineFunctionSplitter : public MachineFunctionPass { |
73 | | public: |
74 | | static char ID; |
75 | 0 | MachineFunctionSplitter() : MachineFunctionPass(ID) { |
76 | 0 | initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); |
77 | 0 | } |
78 | | |
79 | 0 | StringRef getPassName() const override { |
80 | 0 | return "Machine Function Splitter Transformation"; |
81 | 0 | } |
82 | | |
83 | | void getAnalysisUsage(AnalysisUsage &AU) const override; |
84 | | |
85 | | bool runOnMachineFunction(MachineFunction &F) override; |
86 | | }; |
87 | | } // end anonymous namespace |
88 | | |
89 | | /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable |
90 | | /// only by EH pad as cold. This will help mark EH pads statically cold |
91 | | /// instead of relying on profile data. |
92 | 0 | static void setDescendantEHBlocksCold(MachineFunction &MF) { |
93 | 0 | DenseSet<MachineBasicBlock *> EHBlocks; |
94 | 0 | computeEHOnlyBlocks(MF, EHBlocks); |
95 | 0 | for (auto Block : EHBlocks) { |
96 | 0 | Block->setSectionID(MBBSectionID::ColdSectionID); |
97 | 0 | } |
98 | 0 | } |
99 | | |
100 | 0 | static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { |
101 | 0 | auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { |
102 | 0 | return X.getSectionID().Type < Y.getSectionID().Type; |
103 | 0 | }; |
104 | 0 | llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); |
105 | 0 | llvm::avoidZeroOffsetLandingPad(MF); |
106 | 0 | } |
107 | | |
108 | | static bool isColdBlock(const MachineBasicBlock &MBB, |
109 | | const MachineBlockFrequencyInfo *MBFI, |
110 | 0 | ProfileSummaryInfo *PSI) { |
111 | 0 | std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); |
112 | | |
113 | | // Temporary hack to cope with AArch64's jump table encoding |
114 | 0 | const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); |
115 | 0 | if (!TII.isMBBSafeToSplitToCold(MBB)) |
116 | 0 | return false; |
117 | | |
118 | | // For instrumentation profiles and sample profiles, we use different ways |
119 | | // to judge whether a block is cold and should be split. |
120 | 0 | if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { |
121 | | // If using instrument profile, which is deemed "accurate", no count means |
122 | | // cold. |
123 | 0 | if (!Count) |
124 | 0 | return true; |
125 | 0 | if (PercentileCutoff > 0) |
126 | 0 | return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); |
127 | | // Fallthrough to end of function. |
128 | 0 | } else if (PSI->hasSampleProfile()) { |
129 | | // For sample profile, no count means "do not judege coldness". |
130 | 0 | if (!Count) |
131 | 0 | return false; |
132 | 0 | } |
133 | | |
134 | 0 | return (*Count < ColdCountThreshold); |
135 | 0 | } |
136 | | |
137 | 0 | bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { |
138 | | // We target functions with profile data. Static information in the form |
139 | | // of exception handling code may be split to cold if user passes the |
140 | | // mfs-split-ehcode flag. |
141 | 0 | bool UseProfileData = MF.getFunction().hasProfileData(); |
142 | 0 | if (!UseProfileData && !SplitAllEHCode) |
143 | 0 | return false; |
144 | | |
145 | 0 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
146 | 0 | if (!TII.isFunctionSafeToSplit(MF)) |
147 | 0 | return false; |
148 | | |
149 | | // Renumbering blocks here preserves the order of the blocks as |
150 | | // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort |
151 | | // blocks. Preserving the order of blocks is essential to retaining decisions |
152 | | // made by prior passes such as MachineBlockPlacement. |
153 | 0 | MF.RenumberBlocks(); |
154 | 0 | MF.setBBSectionsType(BasicBlockSection::Preset); |
155 | |
|
156 | 0 | MachineBlockFrequencyInfo *MBFI = nullptr; |
157 | 0 | ProfileSummaryInfo *PSI = nullptr; |
158 | 0 | if (UseProfileData) { |
159 | 0 | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); |
160 | 0 | PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
161 | | // If we don't have a good profile (sample profile is not deemed |
162 | | // as a "good profile") and the function is not hot, then early |
163 | | // return. (Because we can only trust hot functions when profile |
164 | | // quality is not good.) |
165 | 0 | if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) { |
166 | | // Split all EH code and it's descendant statically by default. |
167 | 0 | if (SplitAllEHCode) |
168 | 0 | setDescendantEHBlocksCold(MF); |
169 | 0 | finishAdjustingBasicBlocksAndLandingPads(MF); |
170 | 0 | return true; |
171 | 0 | } |
172 | 0 | } |
173 | | |
174 | 0 | SmallVector<MachineBasicBlock *, 2> LandingPads; |
175 | 0 | for (auto &MBB : MF) { |
176 | 0 | if (MBB.isEntryBlock()) |
177 | 0 | continue; |
178 | | |
179 | 0 | if (MBB.isEHPad()) |
180 | 0 | LandingPads.push_back(&MBB); |
181 | 0 | else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) |
182 | 0 | MBB.setSectionID(MBBSectionID::ColdSectionID); |
183 | 0 | } |
184 | | |
185 | | // Split all EH code and it's descendant statically by default. |
186 | 0 | if (SplitAllEHCode) |
187 | 0 | setDescendantEHBlocksCold(MF); |
188 | | // We only split out eh pads if all of them are cold. |
189 | 0 | else { |
190 | | // Here we have UseProfileData == true. |
191 | 0 | bool HasHotLandingPads = false; |
192 | 0 | for (const MachineBasicBlock *LP : LandingPads) { |
193 | 0 | if (!isColdBlock(*LP, MBFI, PSI)) |
194 | 0 | HasHotLandingPads = true; |
195 | 0 | } |
196 | 0 | if (!HasHotLandingPads) { |
197 | 0 | for (MachineBasicBlock *LP : LandingPads) |
198 | 0 | LP->setSectionID(MBBSectionID::ColdSectionID); |
199 | 0 | } |
200 | 0 | } |
201 | |
|
202 | 0 | finishAdjustingBasicBlocksAndLandingPads(MF); |
203 | 0 | return true; |
204 | 0 | } |
205 | | |
206 | 0 | void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { |
207 | 0 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
208 | 0 | AU.addRequired<MachineBlockFrequencyInfo>(); |
209 | 0 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
210 | 0 | } |
211 | | |
212 | | char MachineFunctionSplitter::ID = 0; |
213 | | INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", |
214 | | "Split machine functions using profile information", false, |
215 | | false) |
216 | | |
217 | 0 | MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { |
218 | 0 | return new MachineFunctionSplitter(); |
219 | 0 | } |