/src/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file contains the X86 memory folding tables. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "X86InstrFoldTables.h" |
14 | | #include "X86InstrInfo.h" |
15 | | #include "llvm/ADT/STLExtras.h" |
16 | | #include <atomic> |
17 | | #include <vector> |
18 | | |
19 | | using namespace llvm; |
20 | | |
21 | | // These tables are sorted by their RegOp value allowing them to be binary |
22 | | // searched at runtime without the need for additional storage. The enum values |
23 | | // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which |
24 | | // makes sorting these tables a simple matter of alphabetizing the table. |
25 | | #include "X86GenFoldTables.inc" |
26 | | |
27 | | // Table to map instructions safe to broadcast using a different width from the |
28 | | // element width. |
29 | | static const X86FoldTableEntry BroadcastSizeTable2[] = { |
30 | | { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, |
31 | | { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, |
32 | | { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, |
33 | | { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, |
34 | | { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, |
35 | | { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, |
36 | | { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, |
37 | | { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, |
38 | | { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS }, |
39 | | { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, |
40 | | { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, |
41 | | { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD }, |
42 | | { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, |
43 | | { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, |
44 | | { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS }, |
45 | | { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, |
46 | | { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, |
47 | | { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD }, |
48 | | { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, |
49 | | { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, |
50 | | { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q }, |
51 | | { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, |
52 | | { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, |
53 | | { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, |
54 | | { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, |
55 | | { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, |
56 | | { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D }, |
57 | | { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, |
58 | | { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, |
59 | | { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D }, |
60 | | { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, |
61 | | { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, |
62 | | { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q }, |
63 | | { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, |
64 | | { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, |
65 | | { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D }, |
66 | | { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, |
67 | | { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, |
68 | | { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q }, |
69 | | { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, |
70 | | { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, |
71 | | { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D }, |
72 | | { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, |
73 | | { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, |
74 | | { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS }, |
75 | | { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, |
76 | | { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, |
77 | | { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD }, |
78 | | }; |
79 | | |
80 | | static const X86FoldTableEntry BroadcastSizeTable3[] = { |
81 | | { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, |
82 | | { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, |
83 | | { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, |
84 | | { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, |
85 | | { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, |
86 | | { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, |
87 | | }; |
88 | | |
89 | | static const X86FoldTableEntry * |
90 | 14.4k | lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { |
91 | 14.4k | #ifndef NDEBUG |
92 | 14.4k | #define CHECK_SORTED_UNIQUE(TABLE) \ |
93 | 14.4k | assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \ |
94 | 12 | assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \ |
95 | 12 | std::end(Table) && \ |
96 | 12 | #TABLE " is not unique"); |
97 | | |
98 | | // Make sure the tables are sorted. |
99 | 14.4k | static std::atomic<bool> FoldTablesChecked(false); |
100 | 14.4k | if (!FoldTablesChecked.load(std::memory_order_relaxed)) { |
101 | 1 | CHECK_SORTED_UNIQUE(Table2Addr) |
102 | 1 | CHECK_SORTED_UNIQUE(Table0) |
103 | 1 | CHECK_SORTED_UNIQUE(Table1) |
104 | 1 | CHECK_SORTED_UNIQUE(Table2) |
105 | 1 | CHECK_SORTED_UNIQUE(Table3) |
106 | 1 | CHECK_SORTED_UNIQUE(Table4) |
107 | 1 | CHECK_SORTED_UNIQUE(BroadcastTable1) |
108 | 1 | CHECK_SORTED_UNIQUE(BroadcastTable2) |
109 | 1 | CHECK_SORTED_UNIQUE(BroadcastTable3) |
110 | 1 | CHECK_SORTED_UNIQUE(BroadcastTable4) |
111 | 1 | CHECK_SORTED_UNIQUE(BroadcastSizeTable2) |
112 | 1 | CHECK_SORTED_UNIQUE(BroadcastSizeTable3) |
113 | 0 | FoldTablesChecked.store(true, std::memory_order_relaxed); |
114 | 1 | } |
115 | 0 | #endif |
116 | | |
117 | 0 | const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp); |
118 | 14.4k | if (Data != Table.end() && Data->KeyOp == RegOp && |
119 | 14.4k | !(Data->Flags & TB_NO_FORWARD)) |
120 | 3.86k | return Data; |
121 | 10.6k | return nullptr; |
122 | 14.4k | } |
123 | | |
124 | | const X86FoldTableEntry * |
125 | 345 | llvm::lookupTwoAddrFoldTable(unsigned RegOp) { |
126 | 345 | return lookupFoldTableImpl(Table2Addr, RegOp); |
127 | 345 | } |
128 | | |
129 | | const X86FoldTableEntry * |
130 | 16.0k | llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { |
131 | 16.0k | ArrayRef<X86FoldTableEntry> FoldTable; |
132 | 16.0k | if (OpNum == 0) |
133 | 4.14k | FoldTable = ArrayRef(Table0); |
134 | 11.9k | else if (OpNum == 1) |
135 | 6.60k | FoldTable = ArrayRef(Table1); |
136 | 5.33k | else if (OpNum == 2) |
137 | 1.53k | FoldTable = ArrayRef(Table2); |
138 | 3.80k | else if (OpNum == 3) |
139 | 1.08k | FoldTable = ArrayRef(Table3); |
140 | 2.72k | else if (OpNum == 4) |
141 | 781 | FoldTable = ArrayRef(Table4); |
142 | 1.94k | else |
143 | 1.94k | return nullptr; |
144 | | |
145 | 14.1k | return lookupFoldTableImpl(FoldTable, RegOp); |
146 | 16.0k | } |
147 | | |
148 | | namespace { |
149 | | |
150 | | // This class stores the memory unfolding tables. It is instantiated as a |
151 | | // function scope static variable to lazily init the unfolding table. |
152 | | struct X86MemUnfoldTable { |
153 | | // Stores memory unfolding tables entries sorted by opcode. |
154 | | std::vector<X86FoldTableEntry> Table; |
155 | | |
156 | 1 | X86MemUnfoldTable() { |
157 | 1 | for (const X86FoldTableEntry &Entry : Table2Addr) |
158 | | // Index 0, folded load and store, no alignment requirement. |
159 | 280 | addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); |
160 | | |
161 | 1 | for (const X86FoldTableEntry &Entry : Table0) |
162 | | // Index 0, mix of loads and stores. |
163 | 206 | addTableEntry(Entry, TB_INDEX_0); |
164 | | |
165 | 1 | for (const X86FoldTableEntry &Entry : Table1) |
166 | | // Index 1, folded load |
167 | 1.07k | addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); |
168 | | |
169 | 1 | for (const X86FoldTableEntry &Entry : Table2) |
170 | | // Index 2, folded load |
171 | 2.05k | addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); |
172 | | |
173 | 1 | for (const X86FoldTableEntry &Entry : Table3) |
174 | | // Index 3, folded load |
175 | 1.60k | addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); |
176 | | |
177 | 1 | for (const X86FoldTableEntry &Entry : Table4) |
178 | | // Index 4, folded load |
179 | 1.12k | addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); |
180 | | |
181 | | // Broadcast tables. |
182 | 1 | for (const X86FoldTableEntry &Entry : BroadcastTable2) |
183 | | // Index 2, folded broadcast |
184 | 662 | addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); |
185 | | |
186 | 1 | for (const X86FoldTableEntry &Entry : BroadcastTable3) |
187 | | // Index 3, folded broadcast |
188 | 884 | addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); |
189 | | |
190 | 1 | for (const X86FoldTableEntry &Entry : BroadcastTable4) |
191 | | // Index 4, folded broadcast |
192 | 781 | addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); |
193 | | |
194 | | // Sort the memory->reg unfold table. |
195 | 1 | array_pod_sort(Table.begin(), Table.end()); |
196 | | |
197 | | // Now that it's sorted, ensure its unique. |
198 | 1 | assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && |
199 | 1 | "Memory unfolding table is not unique!"); |
200 | 1 | } |
201 | | |
202 | | void addTableEntry(const X86FoldTableEntry &Entry, |
203 | 8.67k | uint16_t ExtraFlags) { |
204 | | // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. |
205 | 8.67k | if ((Entry.Flags & TB_NO_REVERSE) == 0) |
206 | 7.50k | Table.push_back({Entry.DstOp, Entry.KeyOp, |
207 | 7.50k | static_cast<uint16_t>(Entry.Flags | ExtraFlags) }); |
208 | 8.67k | } |
209 | | }; |
210 | | } |
211 | | |
212 | | const X86FoldTableEntry * |
213 | 1.31k | llvm::lookupUnfoldTable(unsigned MemOp) { |
214 | 1.31k | static X86MemUnfoldTable MemUnfoldTable; |
215 | 1.31k | auto &Table = MemUnfoldTable.Table; |
216 | 1.31k | auto I = llvm::lower_bound(Table, MemOp); |
217 | 1.31k | if (I != Table.end() && I->KeyOp == MemOp) |
218 | 1.30k | return &*I; |
219 | 6 | return nullptr; |
220 | 1.31k | } |
221 | | |
222 | | namespace { |
223 | | |
224 | | // This class stores the memory -> broadcast folding tables. It is instantiated |
225 | | // as a function scope static variable to lazily init the folding table. |
226 | | struct X86BroadcastFoldTable { |
227 | | // Stores memory broadcast folding tables entries sorted by opcode. |
228 | | std::vector<X86FoldTableEntry> Table; |
229 | | |
230 | 1 | X86BroadcastFoldTable() { |
231 | | // Broadcast tables. |
232 | 662 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { |
233 | 662 | unsigned RegOp = Reg2Bcst.KeyOp; |
234 | 662 | unsigned BcstOp = Reg2Bcst.DstOp; |
235 | 662 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { |
236 | 662 | unsigned MemOp = Reg2Mem->DstOp; |
237 | 662 | uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | |
238 | 662 | TB_FOLDED_LOAD | TB_FOLDED_BCAST; |
239 | 662 | Table.push_back({MemOp, BcstOp, Flags}); |
240 | 662 | } |
241 | 662 | } |
242 | 48 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { |
243 | 48 | unsigned RegOp = Reg2Bcst.KeyOp; |
244 | 48 | unsigned BcstOp = Reg2Bcst.DstOp; |
245 | 48 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { |
246 | 48 | unsigned MemOp = Reg2Mem->DstOp; |
247 | 48 | uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | |
248 | 48 | TB_FOLDED_LOAD | TB_FOLDED_BCAST; |
249 | 48 | Table.push_back({MemOp, BcstOp, Flags}); |
250 | 48 | } |
251 | 48 | } |
252 | | |
253 | 884 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { |
254 | 884 | unsigned RegOp = Reg2Bcst.KeyOp; |
255 | 884 | unsigned BcstOp = Reg2Bcst.DstOp; |
256 | 884 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { |
257 | 884 | unsigned MemOp = Reg2Mem->DstOp; |
258 | 884 | uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | |
259 | 884 | TB_FOLDED_LOAD | TB_FOLDED_BCAST; |
260 | 884 | Table.push_back({MemOp, BcstOp, Flags}); |
261 | 884 | } |
262 | 884 | } |
263 | 6 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { |
264 | 6 | unsigned RegOp = Reg2Bcst.KeyOp; |
265 | 6 | unsigned BcstOp = Reg2Bcst.DstOp; |
266 | 6 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { |
267 | 6 | unsigned MemOp = Reg2Mem->DstOp; |
268 | 6 | uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | |
269 | 6 | TB_FOLDED_LOAD | TB_FOLDED_BCAST; |
270 | 6 | Table.push_back({MemOp, BcstOp, Flags}); |
271 | 6 | } |
272 | 6 | } |
273 | | |
274 | 781 | for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { |
275 | 781 | unsigned RegOp = Reg2Bcst.KeyOp; |
276 | 781 | unsigned BcstOp = Reg2Bcst.DstOp; |
277 | 781 | if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) { |
278 | 781 | unsigned MemOp = Reg2Mem->DstOp; |
279 | 781 | uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | |
280 | 781 | TB_FOLDED_LOAD | TB_FOLDED_BCAST; |
281 | 781 | Table.push_back({MemOp, BcstOp, Flags}); |
282 | 781 | } |
283 | 781 | } |
284 | | |
285 | | // Sort the memory->broadcast fold table. |
286 | 1 | array_pod_sort(Table.begin(), Table.end()); |
287 | 1 | } |
288 | | }; |
289 | | } // namespace |
290 | | |
291 | | static bool matchBroadcastSize(const X86FoldTableEntry &Entry, |
292 | 3 | unsigned BroadcastBits) { |
293 | 3 | switch (Entry.Flags & TB_BCAST_MASK) { |
294 | 0 | case TB_BCAST_SD: |
295 | 2 | case TB_BCAST_Q: |
296 | 2 | return BroadcastBits == 64; |
297 | 0 | case TB_BCAST_SS: |
298 | 1 | case TB_BCAST_D: |
299 | 1 | return BroadcastBits == 32; |
300 | 3 | } |
301 | 0 | return false; |
302 | 3 | } |
303 | | |
304 | | const X86FoldTableEntry * |
305 | 48 | llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) { |
306 | 48 | static X86BroadcastFoldTable BroadcastFoldTable; |
307 | 48 | auto &Table = BroadcastFoldTable.Table; |
308 | 48 | for (auto I = llvm::lower_bound(Table, MemOp); |
309 | 49 | I != Table.end() && I->KeyOp == MemOp; ++I) { |
310 | 3 | if (matchBroadcastSize(*I, BroadcastBits)) |
311 | 2 | return &*I; |
312 | 3 | } |
313 | 46 | return nullptr; |
314 | 48 | } |