/src/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines the interfaces that RISC-V uses to lower LLVM code into a |
10 | | // selection DAG. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "RISCVISelLowering.h" |
15 | | #include "MCTargetDesc/RISCVMatInt.h" |
16 | | #include "RISCV.h" |
17 | | #include "RISCVMachineFunctionInfo.h" |
18 | | #include "RISCVRegisterInfo.h" |
19 | | #include "RISCVSubtarget.h" |
20 | | #include "RISCVTargetMachine.h" |
21 | | #include "llvm/ADT/SmallSet.h" |
22 | | #include "llvm/ADT/Statistic.h" |
23 | | #include "llvm/Analysis/MemoryLocation.h" |
24 | | #include "llvm/Analysis/VectorUtils.h" |
25 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
26 | | #include "llvm/CodeGen/MachineFunction.h" |
27 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
29 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
31 | | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
32 | | #include "llvm/CodeGen/ValueTypes.h" |
33 | | #include "llvm/IR/DiagnosticInfo.h" |
34 | | #include "llvm/IR/DiagnosticPrinter.h" |
35 | | #include "llvm/IR/IRBuilder.h" |
36 | | #include "llvm/IR/Instructions.h" |
37 | | #include "llvm/IR/IntrinsicsRISCV.h" |
38 | | #include "llvm/IR/PatternMatch.h" |
39 | | #include "llvm/Support/CommandLine.h" |
40 | | #include "llvm/Support/Debug.h" |
41 | | #include "llvm/Support/ErrorHandling.h" |
42 | | #include "llvm/Support/InstructionCost.h" |
43 | | #include "llvm/Support/KnownBits.h" |
44 | | #include "llvm/Support/MathExtras.h" |
45 | | #include "llvm/Support/raw_ostream.h" |
46 | | #include <optional> |
47 | | |
48 | | using namespace llvm; |
49 | | |
50 | | #define DEBUG_TYPE "riscv-lower" |
51 | | |
52 | | STATISTIC(NumTailCalls, "Number of tail calls"); |
53 | | |
54 | | static cl::opt<unsigned> ExtensionMaxWebSize( |
55 | | DEBUG_TYPE "-ext-max-web-size", cl::Hidden, |
56 | | cl::desc("Give the maximum size (in number of nodes) of the web of " |
57 | | "instructions that we will consider for VW expansion"), |
58 | | cl::init(18)); |
59 | | |
60 | | static cl::opt<bool> |
61 | | AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, |
62 | | cl::desc("Allow the formation of VW_W operations (e.g., " |
63 | | "VWADD_W) with splat constants"), |
64 | | cl::init(false)); |
65 | | |
66 | | static cl::opt<unsigned> NumRepeatedDivisors( |
67 | | DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, |
68 | | cl::desc("Set the minimum number of repetitions of a divisor to allow " |
69 | | "transformation to multiplications by the reciprocal"), |
70 | | cl::init(2)); |
71 | | |
72 | | static cl::opt<int> |
73 | | FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, |
74 | | cl::desc("Give the maximum number of instructions that we will " |
75 | | "use for creating a floating-point immediate value"), |
76 | | cl::init(2)); |
77 | | |
78 | | static cl::opt<bool> |
79 | | RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, |
80 | | cl::desc("Make i32 a legal type for SelectionDAG on RV64.")); |
81 | | |
82 | | RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, |
83 | | const RISCVSubtarget &STI) |
84 | 1 | : TargetLowering(TM), Subtarget(STI) { |
85 | | |
86 | 1 | RISCVABI::ABI ABI = Subtarget.getTargetABI(); |
87 | 1 | assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); |
88 | | |
89 | 1 | if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && |
90 | 1 | !Subtarget.hasStdExtF()) { |
91 | 0 | errs() << "Hard-float 'f' ABI can't be used for a target that " |
92 | 0 | "doesn't support the F instruction set extension (ignoring " |
93 | 0 | "target-abi)\n"; |
94 | 0 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
95 | 1 | } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && |
96 | 1 | !Subtarget.hasStdExtD()) { |
97 | 0 | errs() << "Hard-float 'd' ABI can't be used for a target that " |
98 | 0 | "doesn't support the D instruction set extension (ignoring " |
99 | 0 | "target-abi)\n"; |
100 | 0 | ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; |
101 | 0 | } |
102 | | |
103 | 1 | switch (ABI) { |
104 | 0 | default: |
105 | 0 | report_fatal_error("Don't know how to lower this ABI"); |
106 | 0 | case RISCVABI::ABI_ILP32: |
107 | 0 | case RISCVABI::ABI_ILP32E: |
108 | 0 | case RISCVABI::ABI_LP64E: |
109 | 0 | case RISCVABI::ABI_ILP32F: |
110 | 0 | case RISCVABI::ABI_ILP32D: |
111 | 1 | case RISCVABI::ABI_LP64: |
112 | 1 | case RISCVABI::ABI_LP64F: |
113 | 1 | case RISCVABI::ABI_LP64D: |
114 | 1 | break; |
115 | 1 | } |
116 | | |
117 | 1 | MVT XLenVT = Subtarget.getXLenVT(); |
118 | | |
119 | | // Set up the register classes. |
120 | 1 | addRegisterClass(XLenVT, &RISCV::GPRRegClass); |
121 | 1 | if (Subtarget.is64Bit() && RV64LegalI32) |
122 | 0 | addRegisterClass(MVT::i32, &RISCV::GPRRegClass); |
123 | | |
124 | 1 | if (Subtarget.hasStdExtZfhmin()) |
125 | 0 | addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); |
126 | 1 | if (Subtarget.hasStdExtZfbfmin()) |
127 | 0 | addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass); |
128 | 1 | if (Subtarget.hasStdExtF()) |
129 | 0 | addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); |
130 | 1 | if (Subtarget.hasStdExtD()) |
131 | 0 | addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); |
132 | 1 | if (Subtarget.hasStdExtZhinxmin()) |
133 | 0 | addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass); |
134 | 1 | if (Subtarget.hasStdExtZfinx()) |
135 | 0 | addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass); |
136 | 1 | if (Subtarget.hasStdExtZdinx()) { |
137 | 0 | if (Subtarget.is64Bit()) |
138 | 0 | addRegisterClass(MVT::f64, &RISCV::GPRRegClass); |
139 | 0 | else |
140 | 0 | addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); |
141 | 0 | } |
142 | | |
143 | 1 | static const MVT::SimpleValueType BoolVecVTs[] = { |
144 | 1 | MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, |
145 | 1 | MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; |
146 | 1 | static const MVT::SimpleValueType IntVecVTs[] = { |
147 | 1 | MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, |
148 | 1 | MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, |
149 | 1 | MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, |
150 | 1 | MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, |
151 | 1 | MVT::nxv4i64, MVT::nxv8i64}; |
152 | 1 | static const MVT::SimpleValueType F16VecVTs[] = { |
153 | 1 | MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, |
154 | 1 | MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; |
155 | 1 | static const MVT::SimpleValueType BF16VecVTs[] = { |
156 | 1 | MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16, |
157 | 1 | MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16}; |
158 | 1 | static const MVT::SimpleValueType F32VecVTs[] = { |
159 | 1 | MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; |
160 | 1 | static const MVT::SimpleValueType F64VecVTs[] = { |
161 | 1 | MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; |
162 | | |
163 | 1 | if (Subtarget.hasVInstructions()) { |
164 | 0 | auto addRegClassForRVV = [this](MVT VT) { |
165 | | // Disable the smallest fractional LMUL types if ELEN is less than |
166 | | // RVVBitsPerBlock. |
167 | 0 | unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
168 | 0 | if (VT.getVectorMinNumElements() < MinElts) |
169 | 0 | return; |
170 | | |
171 | 0 | unsigned Size = VT.getSizeInBits().getKnownMinValue(); |
172 | 0 | const TargetRegisterClass *RC; |
173 | 0 | if (Size <= RISCV::RVVBitsPerBlock) |
174 | 0 | RC = &RISCV::VRRegClass; |
175 | 0 | else if (Size == 2 * RISCV::RVVBitsPerBlock) |
176 | 0 | RC = &RISCV::VRM2RegClass; |
177 | 0 | else if (Size == 4 * RISCV::RVVBitsPerBlock) |
178 | 0 | RC = &RISCV::VRM4RegClass; |
179 | 0 | else if (Size == 8 * RISCV::RVVBitsPerBlock) |
180 | 0 | RC = &RISCV::VRM8RegClass; |
181 | 0 | else |
182 | 0 | llvm_unreachable("Unexpected size"); |
183 | |
|
184 | 0 | addRegisterClass(VT, RC); |
185 | 0 | }; |
186 | |
|
187 | 0 | for (MVT VT : BoolVecVTs) |
188 | 0 | addRegClassForRVV(VT); |
189 | 0 | for (MVT VT : IntVecVTs) { |
190 | 0 | if (VT.getVectorElementType() == MVT::i64 && |
191 | 0 | !Subtarget.hasVInstructionsI64()) |
192 | 0 | continue; |
193 | 0 | addRegClassForRVV(VT); |
194 | 0 | } |
195 | |
|
196 | 0 | if (Subtarget.hasVInstructionsF16Minimal()) |
197 | 0 | for (MVT VT : F16VecVTs) |
198 | 0 | addRegClassForRVV(VT); |
199 | |
|
200 | 0 | if (Subtarget.hasVInstructionsBF16()) |
201 | 0 | for (MVT VT : BF16VecVTs) |
202 | 0 | addRegClassForRVV(VT); |
203 | |
|
204 | 0 | if (Subtarget.hasVInstructionsF32()) |
205 | 0 | for (MVT VT : F32VecVTs) |
206 | 0 | addRegClassForRVV(VT); |
207 | |
|
208 | 0 | if (Subtarget.hasVInstructionsF64()) |
209 | 0 | for (MVT VT : F64VecVTs) |
210 | 0 | addRegClassForRVV(VT); |
211 | |
|
212 | 0 | if (Subtarget.useRVVForFixedLengthVectors()) { |
213 | 0 | auto addRegClassForFixedVectors = [this](MVT VT) { |
214 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
215 | 0 | unsigned RCID = getRegClassIDForVecVT(ContainerVT); |
216 | 0 | const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
217 | 0 | addRegisterClass(VT, TRI.getRegClass(RCID)); |
218 | 0 | }; |
219 | 0 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
220 | 0 | if (useRVVForFixedLengthVectorVT(VT)) |
221 | 0 | addRegClassForFixedVectors(VT); |
222 | |
|
223 | 0 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
224 | 0 | if (useRVVForFixedLengthVectorVT(VT)) |
225 | 0 | addRegClassForFixedVectors(VT); |
226 | 0 | } |
227 | 0 | } |
228 | | |
229 | | // Compute derived properties from the register classes. |
230 | 1 | computeRegisterProperties(STI.getRegisterInfo()); |
231 | | |
232 | 1 | setStackPointerRegisterToSaveRestore(RISCV::X2); |
233 | | |
234 | 1 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, |
235 | 1 | MVT::i1, Promote); |
236 | | // DAGCombiner can call isLoadExtLegal for types that aren't legal. |
237 | 1 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, |
238 | 1 | MVT::i1, Promote); |
239 | | |
240 | | // TODO: add all necessary setOperationAction calls. |
241 | 1 | setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); |
242 | | |
243 | 1 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
244 | 1 | setOperationAction(ISD::BR_CC, XLenVT, Expand); |
245 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
246 | 0 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); |
247 | 1 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
248 | 1 | setOperationAction(ISD::SELECT_CC, XLenVT, Expand); |
249 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
250 | 0 | setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); |
251 | | |
252 | 1 | setCondCodeAction(ISD::SETLE, XLenVT, Expand); |
253 | 1 | setCondCodeAction(ISD::SETGT, XLenVT, Custom); |
254 | 1 | setCondCodeAction(ISD::SETGE, XLenVT, Expand); |
255 | 1 | setCondCodeAction(ISD::SETULE, XLenVT, Expand); |
256 | 1 | setCondCodeAction(ISD::SETUGT, XLenVT, Custom); |
257 | 1 | setCondCodeAction(ISD::SETUGE, XLenVT, Expand); |
258 | | |
259 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
260 | 0 | setOperationAction(ISD::SETCC, MVT::i32, Promote); |
261 | | |
262 | 1 | setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); |
263 | | |
264 | 1 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
265 | 1 | setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); |
266 | | |
267 | 1 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
268 | | |
269 | 1 | setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); |
270 | | |
271 | 1 | if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb()) |
272 | 1 | setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); |
273 | | |
274 | 1 | if (Subtarget.is64Bit()) { |
275 | 1 | setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); |
276 | | |
277 | 1 | if (!RV64LegalI32) { |
278 | 1 | setOperationAction(ISD::LOAD, MVT::i32, Custom); |
279 | 1 | setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, |
280 | 1 | MVT::i32, Custom); |
281 | 1 | setOperationAction(ISD::SADDO, MVT::i32, Custom); |
282 | 1 | setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, |
283 | 1 | MVT::i32, Custom); |
284 | 1 | } |
285 | 1 | } else { |
286 | 0 | setLibcallName( |
287 | 0 | {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, |
288 | 0 | nullptr); |
289 | 0 | setLibcallName(RTLIB::MULO_I64, nullptr); |
290 | 0 | } |
291 | | |
292 | 1 | if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { |
293 | 1 | setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); |
294 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
295 | 0 | setOperationAction(ISD::MUL, MVT::i32, Promote); |
296 | 1 | } else if (Subtarget.is64Bit()) { |
297 | 0 | setOperationAction(ISD::MUL, MVT::i128, Custom); |
298 | 0 | if (!RV64LegalI32) |
299 | 0 | setOperationAction(ISD::MUL, MVT::i32, Custom); |
300 | 0 | } else { |
301 | 0 | setOperationAction(ISD::MUL, MVT::i64, Custom); |
302 | 0 | } |
303 | | |
304 | 1 | if (!Subtarget.hasStdExtM()) { |
305 | 1 | setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, |
306 | 1 | XLenVT, Expand); |
307 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
308 | 0 | setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32, |
309 | 0 | Promote); |
310 | 1 | } else if (Subtarget.is64Bit()) { |
311 | 0 | if (!RV64LegalI32) |
312 | 0 | setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, |
313 | 0 | {MVT::i8, MVT::i16, MVT::i32}, Custom); |
314 | 0 | } |
315 | | |
316 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) { |
317 | 0 | setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand); |
318 | 0 | setOperationAction( |
319 | 0 | {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, |
320 | 0 | Expand); |
321 | 0 | } |
322 | | |
323 | 1 | setOperationAction( |
324 | 1 | {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, |
325 | 1 | Expand); |
326 | | |
327 | 1 | setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, |
328 | 1 | Custom); |
329 | | |
330 | 1 | if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { |
331 | 0 | if (!RV64LegalI32 && Subtarget.is64Bit()) |
332 | 0 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); |
333 | 1 | } else if (Subtarget.hasVendorXTHeadBb()) { |
334 | 0 | if (Subtarget.is64Bit()) |
335 | 0 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); |
336 | 0 | setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom); |
337 | 1 | } else if (Subtarget.hasVendorXCVbitmanip()) { |
338 | 0 | setOperationAction(ISD::ROTL, XLenVT, Expand); |
339 | 1 | } else { |
340 | 1 | setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); |
341 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
342 | 0 | setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand); |
343 | 1 | } |
344 | | |
345 | | // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll |
346 | | // pattern match it directly in isel. |
347 | 1 | setOperationAction(ISD::BSWAP, XLenVT, |
348 | 1 | (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
349 | 1 | Subtarget.hasVendorXTHeadBb()) |
350 | 1 | ? Legal |
351 | 1 | : Expand); |
352 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
353 | 0 | setOperationAction(ISD::BSWAP, MVT::i32, |
354 | 0 | (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
355 | 0 | Subtarget.hasVendorXTHeadBb()) |
356 | 0 | ? Promote |
357 | 0 | : Expand); |
358 | | |
359 | | |
360 | 1 | if (Subtarget.hasVendorXCVbitmanip()) { |
361 | 0 | setOperationAction(ISD::BITREVERSE, XLenVT, Legal); |
362 | 1 | } else { |
363 | | // Zbkb can use rev8+brev8 to implement bitreverse. |
364 | 1 | setOperationAction(ISD::BITREVERSE, XLenVT, |
365 | 1 | Subtarget.hasStdExtZbkb() ? Custom : Expand); |
366 | 1 | } |
367 | | |
368 | 1 | if (Subtarget.hasStdExtZbb()) { |
369 | 0 | setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, |
370 | 0 | Legal); |
371 | 0 | if (RV64LegalI32 && Subtarget.is64Bit()) |
372 | 0 | setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32, |
373 | 0 | Promote); |
374 | |
|
375 | 0 | if (Subtarget.is64Bit()) { |
376 | 0 | if (RV64LegalI32) |
377 | 0 | setOperationAction(ISD::CTTZ, MVT::i32, Legal); |
378 | 0 | else |
379 | 0 | setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); |
380 | 0 | } |
381 | 1 | } else if (!Subtarget.hasVendorXCVbitmanip()) { |
382 | 1 | setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand); |
383 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
384 | 0 | setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand); |
385 | 1 | } |
386 | | |
387 | 1 | if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
388 | 1 | Subtarget.hasVendorXCVbitmanip()) { |
389 | | // We need the custom lowering to make sure that the resulting sequence |
390 | | // for the 32bit case is efficient on 64bit targets. |
391 | 0 | if (Subtarget.is64Bit()) { |
392 | 0 | if (RV64LegalI32) { |
393 | 0 | setOperationAction(ISD::CTLZ, MVT::i32, |
394 | 0 | Subtarget.hasStdExtZbb() ? Legal : Promote); |
395 | 0 | if (!Subtarget.hasStdExtZbb()) |
396 | 0 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); |
397 | 0 | } else |
398 | 0 | setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); |
399 | 0 | } |
400 | 1 | } else { |
401 | 1 | setOperationAction(ISD::CTLZ, XLenVT, Expand); |
402 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
403 | 0 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); |
404 | 1 | } |
405 | | |
406 | 1 | if (!RV64LegalI32 && Subtarget.is64Bit() && |
407 | 1 | !Subtarget.hasShortForwardBranchOpt()) |
408 | 1 | setOperationAction(ISD::ABS, MVT::i32, Custom); |
409 | | |
410 | | // We can use PseudoCCSUB to implement ABS. |
411 | 1 | if (Subtarget.hasShortForwardBranchOpt()) |
412 | 0 | setOperationAction(ISD::ABS, XLenVT, Legal); |
413 | | |
414 | 1 | if (!Subtarget.hasVendorXTHeadCondMov()) |
415 | 1 | setOperationAction(ISD::SELECT, XLenVT, Custom); |
416 | | |
417 | 1 | if (RV64LegalI32 && Subtarget.is64Bit()) |
418 | 0 | setOperationAction(ISD::SELECT, MVT::i32, Promote); |
419 | | |
420 | 1 | static const unsigned FPLegalNodeTypes[] = { |
421 | 1 | ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, |
422 | 1 | ISD::LLRINT, ISD::LROUND, ISD::LLROUND, |
423 | 1 | ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, |
424 | 1 | ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, |
425 | 1 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
426 | 1 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; |
427 | | |
428 | 1 | static const ISD::CondCode FPCCToExpand[] = { |
429 | 1 | ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
430 | 1 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, |
431 | 1 | ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; |
432 | | |
433 | 1 | static const unsigned FPOpToExpand[] = { |
434 | 1 | ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, |
435 | 1 | ISD::FREM}; |
436 | | |
437 | 1 | static const unsigned FPRndMode[] = { |
438 | 1 | ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
439 | 1 | ISD::FROUNDEVEN}; |
440 | | |
441 | 1 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
442 | 0 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
443 | | |
444 | 1 | static const unsigned ZfhminZfbfminPromoteOps[] = { |
445 | 1 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, |
446 | 1 | ISD::FSUB, ISD::FMUL, ISD::FMA, |
447 | 1 | ISD::FDIV, ISD::FSQRT, ISD::FABS, |
448 | 1 | ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, |
449 | 1 | ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, |
450 | 1 | ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
451 | 1 | ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, |
452 | 1 | ISD::FTRUNC, ISD::FRINT, ISD::FROUND, |
453 | 1 | ISD::FROUNDEVEN, ISD::SELECT}; |
454 | | |
455 | 1 | if (Subtarget.hasStdExtZfbfmin()) { |
456 | 0 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
457 | 0 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
458 | 0 | setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom); |
459 | 0 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); |
460 | 0 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); |
461 | 0 | setOperationAction(ISD::ConstantFP, MVT::bf16, Expand); |
462 | 0 | setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand); |
463 | 0 | setOperationAction(ISD::BR_CC, MVT::bf16, Expand); |
464 | 0 | setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); |
465 | 0 | setOperationAction(ISD::FREM, MVT::bf16, Promote); |
466 | | // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the |
467 | | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
468 | 0 | setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); |
469 | 0 | } |
470 | | |
471 | 1 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) { |
472 | 0 | if (Subtarget.hasStdExtZfhOrZhinx()) { |
473 | 0 | setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); |
474 | 0 | setOperationAction(FPRndMode, MVT::f16, |
475 | 0 | Subtarget.hasStdExtZfa() ? Legal : Custom); |
476 | 0 | setOperationAction(ISD::SELECT, MVT::f16, Custom); |
477 | 0 | setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom); |
478 | 0 | } else { |
479 | 0 | setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote); |
480 | 0 | setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, |
481 | 0 | ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, |
482 | 0 | MVT::f16, Legal); |
483 | | // FIXME: Need to promote f16 FCOPYSIGN to f32, but the |
484 | | // DAGCombiner::visitFP_ROUND probably needs improvements first. |
485 | 0 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); |
486 | 0 | } |
487 | |
|
488 | 0 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); |
489 | 0 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); |
490 | 0 | setCondCodeAction(FPCCToExpand, MVT::f16, Expand); |
491 | 0 | setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); |
492 | 0 | setOperationAction(ISD::BR_CC, MVT::f16, Expand); |
493 | |
|
494 | 0 | setOperationAction(ISD::FNEARBYINT, MVT::f16, |
495 | 0 | Subtarget.hasStdExtZfa() ? Legal : Promote); |
496 | 0 | setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI, |
497 | 0 | ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, |
498 | 0 | ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, |
499 | 0 | ISD::FLOG10}, |
500 | 0 | MVT::f16, Promote); |
501 | | |
502 | | // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have |
503 | | // complete support for all operations in LegalizeDAG. |
504 | 0 | setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, |
505 | 0 | ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, |
506 | 0 | ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, |
507 | 0 | ISD::STRICT_FTRUNC}, |
508 | 0 | MVT::f16, Promote); |
509 | | |
510 | | // We need to custom promote this. |
511 | 0 | if (Subtarget.is64Bit()) |
512 | 0 | setOperationAction(ISD::FPOWI, MVT::i32, Custom); |
513 | |
|
514 | 0 | if (!Subtarget.hasStdExtZfa()) |
515 | 0 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); |
516 | 0 | } |
517 | | |
518 | 1 | if (Subtarget.hasStdExtFOrZfinx()) { |
519 | 0 | setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); |
520 | 0 | setOperationAction(FPRndMode, MVT::f32, |
521 | 0 | Subtarget.hasStdExtZfa() ? Legal : Custom); |
522 | 0 | setCondCodeAction(FPCCToExpand, MVT::f32, Expand); |
523 | 0 | setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); |
524 | 0 | setOperationAction(ISD::SELECT, MVT::f32, Custom); |
525 | 0 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
526 | 0 | setOperationAction(FPOpToExpand, MVT::f32, Expand); |
527 | 0 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
528 | 0 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
529 | 0 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); |
530 | 0 | setTruncStoreAction(MVT::f32, MVT::bf16, Expand); |
531 | 0 | setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom); |
532 | 0 | setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom); |
533 | 0 | setOperationAction(ISD::FP_TO_BF16, MVT::f32, |
534 | 0 | Subtarget.isSoftFPABI() ? LibCall : Custom); |
535 | 0 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); |
536 | 0 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom); |
537 | |
|
538 | 0 | if (Subtarget.hasStdExtZfa()) |
539 | 0 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
540 | 0 | else |
541 | 0 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); |
542 | 0 | } |
543 | | |
544 | 1 | if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) |
545 | 0 | setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
546 | | |
547 | 1 | if (Subtarget.hasStdExtDOrZdinx()) { |
548 | 0 | setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); |
549 | |
|
550 | 0 | if (Subtarget.hasStdExtZfa()) { |
551 | 0 | setOperationAction(FPRndMode, MVT::f64, Legal); |
552 | 0 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
553 | 0 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
554 | 0 | setOperationAction(ISD::BITCAST, MVT::f64, Custom); |
555 | 0 | } else { |
556 | 0 | if (Subtarget.is64Bit()) |
557 | 0 | setOperationAction(FPRndMode, MVT::f64, Custom); |
558 | |
|
559 | 0 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); |
560 | 0 | } |
561 | |
|
562 | 0 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
563 | 0 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
564 | 0 | setCondCodeAction(FPCCToExpand, MVT::f64, Expand); |
565 | 0 | setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); |
566 | 0 | setOperationAction(ISD::SELECT, MVT::f64, Custom); |
567 | 0 | setOperationAction(ISD::BR_CC, MVT::f64, Expand); |
568 | 0 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
569 | 0 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
570 | 0 | setOperationAction(FPOpToExpand, MVT::f64, Expand); |
571 | 0 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
572 | 0 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
573 | 0 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); |
574 | 0 | setTruncStoreAction(MVT::f64, MVT::bf16, Expand); |
575 | 0 | setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom); |
576 | 0 | setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom); |
577 | 0 | setOperationAction(ISD::FP_TO_BF16, MVT::f64, |
578 | 0 | Subtarget.isSoftFPABI() ? LibCall : Custom); |
579 | 0 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); |
580 | 0 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
581 | 0 | } |
582 | | |
583 | 1 | if (Subtarget.is64Bit()) { |
584 | 1 | setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, |
585 | 1 | ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, |
586 | 1 | MVT::i32, Custom); |
587 | 1 | setOperationAction(ISD::LROUND, MVT::i32, Custom); |
588 | 1 | } |
589 | | |
590 | 1 | if (Subtarget.hasStdExtFOrZfinx()) { |
591 | 0 | setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, |
592 | 0 | Custom); |
593 | |
|
594 | 0 | setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
595 | 0 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
596 | 0 | XLenVT, Legal); |
597 | |
|
598 | 0 | if (RV64LegalI32 && Subtarget.is64Bit()) |
599 | 0 | setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, |
600 | 0 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, |
601 | 0 | MVT::i32, Legal); |
602 | |
|
603 | 0 | setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); |
604 | 0 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
605 | 0 | } |
606 | | |
607 | 1 | setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
608 | 1 | ISD::JumpTable}, |
609 | 1 | XLenVT, Custom); |
610 | | |
611 | 1 | setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); |
612 | | |
613 | 1 | if (Subtarget.is64Bit()) |
614 | 1 | setOperationAction(ISD::Constant, MVT::i64, Custom); |
615 | | |
616 | | // TODO: On M-mode only targets, the cycle[h] CSR may not be present. |
617 | | // Unfortunately this can't be determined just from the ISA naming string. |
618 | 1 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, |
619 | 1 | Subtarget.is64Bit() ? Legal : Custom); |
620 | | |
621 | 1 | setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); |
622 | 1 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
623 | 1 | if (Subtarget.is64Bit()) |
624 | 1 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); |
625 | | |
626 | 1 | if (Subtarget.hasStdExtZicbop()) { |
627 | 0 | setOperationAction(ISD::PREFETCH, MVT::Other, Legal); |
628 | 0 | } |
629 | | |
630 | 1 | if (Subtarget.hasStdExtA()) { |
631 | 0 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
632 | 0 | setMinCmpXchgSizeInBits(32); |
633 | 1 | } else if (Subtarget.hasForcedAtomics()) { |
634 | 0 | setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); |
635 | 1 | } else { |
636 | 1 | setMaxAtomicSizeInBitsSupported(0); |
637 | 1 | } |
638 | | |
639 | 1 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
640 | | |
641 | 1 | setBooleanContents(ZeroOrOneBooleanContent); |
642 | | |
643 | 1 | if (Subtarget.hasVInstructions()) { |
644 | 0 | setBooleanVectorContents(ZeroOrOneBooleanContent); |
645 | |
|
646 | 0 | setOperationAction(ISD::VSCALE, XLenVT, Custom); |
647 | 0 | if (RV64LegalI32 && Subtarget.is64Bit()) |
648 | 0 | setOperationAction(ISD::VSCALE, MVT::i32, Custom); |
649 | | |
650 | | // RVV intrinsics may have illegal operands. |
651 | | // We also need to custom legalize vmv.x.s. |
652 | 0 | setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, |
653 | 0 | ISD::INTRINSIC_VOID}, |
654 | 0 | {MVT::i8, MVT::i16}, Custom); |
655 | 0 | if (Subtarget.is64Bit()) |
656 | 0 | setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
657 | 0 | MVT::i32, Custom); |
658 | 0 | else |
659 | 0 | setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, |
660 | 0 | MVT::i64, Custom); |
661 | |
|
662 | 0 | setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, |
663 | 0 | MVT::Other, Custom); |
664 | |
|
665 | 0 | static const unsigned IntegerVPOps[] = { |
666 | 0 | ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, |
667 | 0 | ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, |
668 | 0 | ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, |
669 | 0 | ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, |
670 | 0 | ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
671 | 0 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, |
672 | 0 | ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, |
673 | 0 | ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, |
674 | 0 | ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, |
675 | 0 | ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, |
676 | 0 | ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, |
677 | 0 | ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; |
678 | |
|
679 | 0 | static const unsigned FloatingPointVPOps[] = { |
680 | 0 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
681 | 0 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
682 | 0 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
683 | 0 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, |
684 | 0 | ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, |
685 | 0 | ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, |
686 | 0 | ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, |
687 | 0 | ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, |
688 | 0 | ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, |
689 | 0 | ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, |
690 | 0 | ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; |
691 | |
|
692 | 0 | static const unsigned IntegerVecReduceOps[] = { |
693 | 0 | ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
694 | 0 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
695 | 0 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; |
696 | |
|
697 | 0 | static const unsigned FloatingPointVecReduceOps[] = { |
698 | 0 | ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, |
699 | 0 | ISD::VECREDUCE_FMAX}; |
700 | |
|
701 | 0 | if (!Subtarget.is64Bit()) { |
702 | | // We must custom-lower certain vXi64 operations on RV32 due to the vector |
703 | | // element type being illegal. |
704 | 0 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
705 | 0 | MVT::i64, Custom); |
706 | |
|
707 | 0 | setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); |
708 | |
|
709 | 0 | setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, |
710 | 0 | ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, |
711 | 0 | ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, |
712 | 0 | ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, |
713 | 0 | MVT::i64, Custom); |
714 | 0 | } |
715 | |
|
716 | 0 | for (MVT VT : BoolVecVTs) { |
717 | 0 | if (!isTypeLegal(VT)) |
718 | 0 | continue; |
719 | | |
720 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
721 | | |
722 | | // Mask VTs are custom-expanded into a series of standard nodes |
723 | 0 | setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, |
724 | 0 | ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, |
725 | 0 | ISD::SCALAR_TO_VECTOR}, |
726 | 0 | VT, Custom); |
727 | |
|
728 | 0 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
729 | 0 | Custom); |
730 | |
|
731 | 0 | setOperationAction(ISD::SELECT, VT, Custom); |
732 | 0 | setOperationAction( |
733 | 0 | {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, |
734 | 0 | Expand); |
735 | |
|
736 | 0 | setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); |
737 | |
|
738 | 0 | setOperationAction( |
739 | 0 | {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
740 | 0 | Custom); |
741 | |
|
742 | 0 | setOperationAction( |
743 | 0 | {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
744 | 0 | Custom); |
745 | | |
746 | | // RVV has native int->float & float->int conversions where the |
747 | | // element type sizes are within one power-of-two of each other. Any |
748 | | // wider distances between type sizes have to be lowered as sequences |
749 | | // which progressively narrow the gap in stages. |
750 | 0 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
751 | 0 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
752 | 0 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
753 | 0 | ISD::STRICT_FP_TO_UINT}, |
754 | 0 | VT, Custom); |
755 | 0 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
756 | 0 | Custom); |
757 | | |
758 | | // Expand all extending loads to types larger than this, and truncating |
759 | | // stores from types larger than this. |
760 | 0 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
761 | 0 | setTruncStoreAction(VT, OtherVT, Expand); |
762 | 0 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
763 | 0 | OtherVT, Expand); |
764 | 0 | } |
765 | |
|
766 | 0 | setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
767 | 0 | ISD::VP_TRUNCATE, ISD::VP_SETCC}, |
768 | 0 | VT, Custom); |
769 | |
|
770 | 0 | setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); |
771 | 0 | setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); |
772 | |
|
773 | 0 | setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); |
774 | |
|
775 | 0 | setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); |
776 | 0 | setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); |
777 | |
|
778 | 0 | setOperationPromotedToType( |
779 | 0 | ISD::VECTOR_SPLICE, VT, |
780 | 0 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); |
781 | 0 | } |
782 | |
|
783 | 0 | for (MVT VT : IntVecVTs) { |
784 | 0 | if (!isTypeLegal(VT)) |
785 | 0 | continue; |
786 | | |
787 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
788 | 0 | setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
789 | | |
790 | | // Vectors implement MULHS/MULHU. |
791 | 0 | setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); |
792 | | |
793 | | // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
794 | 0 | if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) |
795 | 0 | setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); |
796 | |
|
797 | 0 | setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, |
798 | 0 | Legal); |
799 | | |
800 | | // Custom-lower extensions and truncations from/to mask types. |
801 | 0 | setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, |
802 | 0 | VT, Custom); |
803 | | |
804 | | // RVV has native int->float & float->int conversions where the |
805 | | // element type sizes are within one power-of-two of each other. Any |
806 | | // wider distances between type sizes have to be lowered as sequences |
807 | | // which progressively narrow the gap in stages. |
808 | 0 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, |
809 | 0 | ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP, |
810 | 0 | ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT, |
811 | 0 | ISD::STRICT_FP_TO_UINT}, |
812 | 0 | VT, Custom); |
813 | 0 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
814 | 0 | Custom); |
815 | 0 | setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); |
816 | 0 | setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, |
817 | 0 | ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, |
818 | 0 | VT, Legal); |
819 | | |
820 | | // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" |
821 | | // nodes which truncate by one power of two at a time. |
822 | 0 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
823 | | |
824 | | // Custom-lower insert/extract operations to simplify patterns. |
825 | 0 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
826 | 0 | Custom); |
827 | | |
828 | | // Custom-lower reduction operations to set up the corresponding custom |
829 | | // nodes' operands. |
830 | 0 | setOperationAction(IntegerVecReduceOps, VT, Custom); |
831 | |
|
832 | 0 | setOperationAction(IntegerVPOps, VT, Custom); |
833 | |
|
834 | 0 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
835 | |
|
836 | 0 | setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
837 | 0 | VT, Custom); |
838 | |
|
839 | 0 | setOperationAction( |
840 | 0 | {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
841 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
842 | 0 | VT, Custom); |
843 | |
|
844 | 0 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
845 | 0 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
846 | 0 | VT, Custom); |
847 | |
|
848 | 0 | setOperationAction(ISD::SELECT, VT, Custom); |
849 | 0 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
850 | |
|
851 | 0 | setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); |
852 | |
|
853 | 0 | for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { |
854 | 0 | setTruncStoreAction(VT, OtherVT, Expand); |
855 | 0 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
856 | 0 | OtherVT, Expand); |
857 | 0 | } |
858 | |
|
859 | 0 | setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); |
860 | 0 | setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); |
861 | | |
862 | | // Splice |
863 | 0 | setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
864 | |
|
865 | 0 | if (Subtarget.hasStdExtZvkb()) { |
866 | 0 | setOperationAction(ISD::BSWAP, VT, Legal); |
867 | 0 | setOperationAction(ISD::VP_BSWAP, VT, Custom); |
868 | 0 | } else { |
869 | 0 | setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand); |
870 | 0 | setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); |
871 | 0 | } |
872 | |
|
873 | 0 | if (Subtarget.hasStdExtZvbb()) { |
874 | 0 | setOperationAction(ISD::BITREVERSE, VT, Legal); |
875 | 0 | setOperationAction(ISD::VP_BITREVERSE, VT, Custom); |
876 | 0 | setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
877 | 0 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
878 | 0 | VT, Custom); |
879 | 0 | } else { |
880 | 0 | setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand); |
881 | 0 | setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand); |
882 | 0 | setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, |
883 | 0 | ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, |
884 | 0 | VT, Expand); |
885 | | |
886 | | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
887 | | // range of f32. |
888 | 0 | EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
889 | 0 | if (isTypeLegal(FloatVT)) { |
890 | 0 | setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
891 | 0 | ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ, |
892 | 0 | ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF}, |
893 | 0 | VT, Custom); |
894 | 0 | } |
895 | 0 | } |
896 | 0 | } |
897 | | |
898 | | // Expand various CCs to best match the RVV ISA, which natively supports UNE |
899 | | // but no other unordered comparisons, and supports all ordered comparisons |
900 | | // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization |
901 | | // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), |
902 | | // and we pattern-match those back to the "original", swapping operands once |
903 | | // more. This way we catch both operations and both "vf" and "fv" forms with |
904 | | // fewer patterns. |
905 | 0 | static const ISD::CondCode VFPCCToExpand[] = { |
906 | 0 | ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, |
907 | 0 | ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, |
908 | 0 | ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, |
909 | 0 | }; |
910 | | |
911 | | // TODO: support more ops. |
912 | 0 | static const unsigned ZvfhminPromoteOps[] = { |
913 | 0 | ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, |
914 | 0 | ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, |
915 | 0 | ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, |
916 | 0 | ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, |
917 | 0 | ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM, |
918 | 0 | ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
919 | 0 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; |
920 | | |
921 | | // TODO: support more vp ops. |
922 | 0 | static const unsigned ZvfhminPromoteVPOps[] = { |
923 | 0 | ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, |
924 | 0 | ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, |
925 | 0 | ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, |
926 | 0 | ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, |
927 | 0 | ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, |
928 | 0 | ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, |
929 | 0 | ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, |
930 | 0 | ISD::VP_FNEARBYINT, ISD::VP_SETCC}; |
931 | | |
932 | | // Sets common operation actions on RVV floating-point vector types. |
933 | 0 | const auto SetCommonVFPActions = [&](MVT VT) { |
934 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
935 | | // RVV has native FP_ROUND & FP_EXTEND conversions where the element type |
936 | | // sizes are within one power-of-two of each other. Therefore conversions |
937 | | // between vXf16 and vXf64 must be lowered as sequences which convert via |
938 | | // vXf32. |
939 | 0 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
940 | | // Custom-lower insert/extract operations to simplify patterns. |
941 | 0 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, |
942 | 0 | Custom); |
943 | | // Expand various condition codes (explained above). |
944 | 0 | setCondCodeAction(VFPCCToExpand, VT, Expand); |
945 | |
|
946 | 0 | setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); |
947 | 0 | setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); |
948 | |
|
949 | 0 | setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
950 | 0 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, |
951 | 0 | ISD::IS_FPCLASS}, |
952 | 0 | VT, Custom); |
953 | |
|
954 | 0 | setOperationAction(FloatingPointVecReduceOps, VT, Custom); |
955 | | |
956 | | // Expand FP operations that need libcalls. |
957 | 0 | setOperationAction(ISD::FREM, VT, Expand); |
958 | 0 | setOperationAction(ISD::FPOW, VT, Expand); |
959 | 0 | setOperationAction(ISD::FCOS, VT, Expand); |
960 | 0 | setOperationAction(ISD::FSIN, VT, Expand); |
961 | 0 | setOperationAction(ISD::FSINCOS, VT, Expand); |
962 | 0 | setOperationAction(ISD::FEXP, VT, Expand); |
963 | 0 | setOperationAction(ISD::FEXP2, VT, Expand); |
964 | 0 | setOperationAction(ISD::FEXP10, VT, Expand); |
965 | 0 | setOperationAction(ISD::FLOG, VT, Expand); |
966 | 0 | setOperationAction(ISD::FLOG2, VT, Expand); |
967 | 0 | setOperationAction(ISD::FLOG10, VT, Expand); |
968 | |
|
969 | 0 | setOperationAction(ISD::FCOPYSIGN, VT, Legal); |
970 | |
|
971 | 0 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
972 | |
|
973 | 0 | setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, |
974 | 0 | VT, Custom); |
975 | |
|
976 | 0 | setOperationAction( |
977 | 0 | {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
978 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, |
979 | 0 | VT, Custom); |
980 | |
|
981 | 0 | setOperationAction(ISD::SELECT, VT, Custom); |
982 | 0 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
983 | |
|
984 | 0 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
985 | 0 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
986 | 0 | VT, Custom); |
987 | |
|
988 | 0 | setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); |
989 | 0 | setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); |
990 | |
|
991 | 0 | setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); |
992 | |
|
993 | 0 | setOperationAction(FloatingPointVPOps, VT, Custom); |
994 | |
|
995 | 0 | setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, |
996 | 0 | Custom); |
997 | 0 | setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
998 | 0 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}, |
999 | 0 | VT, Legal); |
1000 | 0 | setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, |
1001 | 0 | ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL, |
1002 | 0 | ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1003 | 0 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1004 | 0 | VT, Custom); |
1005 | 0 | }; |
1006 | | |
1007 | | // Sets common extload/truncstore actions on RVV floating-point vector |
1008 | | // types. |
1009 | 0 | const auto SetCommonVFPExtLoadTruncStoreActions = |
1010 | 0 | [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { |
1011 | 0 | for (auto SmallVT : SmallerVTs) { |
1012 | 0 | setTruncStoreAction(VT, SmallVT, Expand); |
1013 | 0 | setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); |
1014 | 0 | } |
1015 | 0 | }; |
1016 | |
|
1017 | 0 | if (Subtarget.hasVInstructionsF16()) { |
1018 | 0 | for (MVT VT : F16VecVTs) { |
1019 | 0 | if (!isTypeLegal(VT)) |
1020 | 0 | continue; |
1021 | 0 | SetCommonVFPActions(VT); |
1022 | 0 | } |
1023 | 0 | } else if (Subtarget.hasVInstructionsF16Minimal()) { |
1024 | 0 | for (MVT VT : F16VecVTs) { |
1025 | 0 | if (!isTypeLegal(VT)) |
1026 | 0 | continue; |
1027 | 0 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1028 | 0 | setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1029 | 0 | Custom); |
1030 | 0 | setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); |
1031 | 0 | setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, |
1032 | 0 | Custom); |
1033 | 0 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1034 | 0 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1035 | 0 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1036 | 0 | VT, Custom); |
1037 | 0 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1038 | 0 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1039 | 0 | VT, Custom); |
1040 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1041 | | // load/store |
1042 | 0 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1043 | | |
1044 | | // Custom split nxv32f16 since nxv32f32 if not legal. |
1045 | 0 | if (VT == MVT::nxv32f16) { |
1046 | 0 | setOperationAction(ZvfhminPromoteOps, VT, Custom); |
1047 | 0 | setOperationAction(ZvfhminPromoteVPOps, VT, Custom); |
1048 | 0 | continue; |
1049 | 0 | } |
1050 | | // Add more promote ops. |
1051 | 0 | MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1052 | 0 | setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); |
1053 | 0 | setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); |
1054 | 0 | } |
1055 | 0 | } |
1056 | |
|
1057 | 0 | if (Subtarget.hasVInstructionsF32()) { |
1058 | 0 | for (MVT VT : F32VecVTs) { |
1059 | 0 | if (!isTypeLegal(VT)) |
1060 | 0 | continue; |
1061 | 0 | SetCommonVFPActions(VT); |
1062 | 0 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1063 | 0 | } |
1064 | 0 | } |
1065 | |
|
1066 | 0 | if (Subtarget.hasVInstructionsF64()) { |
1067 | 0 | for (MVT VT : F64VecVTs) { |
1068 | 0 | if (!isTypeLegal(VT)) |
1069 | 0 | continue; |
1070 | 0 | SetCommonVFPActions(VT); |
1071 | 0 | SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); |
1072 | 0 | SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); |
1073 | 0 | } |
1074 | 0 | } |
1075 | |
|
1076 | 0 | if (Subtarget.useRVVForFixedLengthVectors()) { |
1077 | 0 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
1078 | 0 | if (!useRVVForFixedLengthVectorVT(VT)) |
1079 | 0 | continue; |
1080 | | |
1081 | | // By default everything must be expanded. |
1082 | 0 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1083 | 0 | setOperationAction(Op, VT, Expand); |
1084 | 0 | for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { |
1085 | 0 | setTruncStoreAction(VT, OtherVT, Expand); |
1086 | 0 | setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT, |
1087 | 0 | OtherVT, Expand); |
1088 | 0 | } |
1089 | | |
1090 | | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1091 | | // expansion to a build_vector of 0s. |
1092 | 0 | setOperationAction(ISD::UNDEF, VT, Custom); |
1093 | | |
1094 | | // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
1095 | 0 | setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, |
1096 | 0 | Custom); |
1097 | |
|
1098 | 0 | setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, |
1099 | 0 | Custom); |
1100 | |
|
1101 | 0 | setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, |
1102 | 0 | VT, Custom); |
1103 | |
|
1104 | 0 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1105 | |
|
1106 | 0 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1107 | |
|
1108 | 0 | setOperationAction(ISD::SETCC, VT, Custom); |
1109 | |
|
1110 | 0 | setOperationAction(ISD::SELECT, VT, Custom); |
1111 | |
|
1112 | 0 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1113 | |
|
1114 | 0 | setOperationAction(ISD::BITCAST, VT, Custom); |
1115 | |
|
1116 | 0 | setOperationAction( |
1117 | 0 | {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, |
1118 | 0 | Custom); |
1119 | |
|
1120 | 0 | setOperationAction( |
1121 | 0 | {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, |
1122 | 0 | Custom); |
1123 | |
|
1124 | 0 | setOperationAction( |
1125 | 0 | { |
1126 | 0 | ISD::SINT_TO_FP, |
1127 | 0 | ISD::UINT_TO_FP, |
1128 | 0 | ISD::FP_TO_SINT, |
1129 | 0 | ISD::FP_TO_UINT, |
1130 | 0 | ISD::STRICT_SINT_TO_FP, |
1131 | 0 | ISD::STRICT_UINT_TO_FP, |
1132 | 0 | ISD::STRICT_FP_TO_SINT, |
1133 | 0 | ISD::STRICT_FP_TO_UINT, |
1134 | 0 | }, |
1135 | 0 | VT, Custom); |
1136 | 0 | setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, |
1137 | 0 | Custom); |
1138 | |
|
1139 | 0 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1140 | | |
1141 | | // Operations below are different for between masks and other vectors. |
1142 | 0 | if (VT.getVectorElementType() == MVT::i1) { |
1143 | 0 | setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, |
1144 | 0 | ISD::OR, ISD::XOR}, |
1145 | 0 | VT, Custom); |
1146 | |
|
1147 | 0 | setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, |
1148 | 0 | ISD::VP_SETCC, ISD::VP_TRUNCATE}, |
1149 | 0 | VT, Custom); |
1150 | |
|
1151 | 0 | setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); |
1152 | 0 | setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); |
1153 | 0 | continue; |
1154 | 0 | } |
1155 | | |
1156 | | // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to |
1157 | | // it before type legalization for i64 vectors on RV32. It will then be |
1158 | | // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. |
1159 | | // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs |
1160 | | // improvements first. |
1161 | 0 | if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { |
1162 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); |
1163 | 0 | setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); |
1164 | 0 | } |
1165 | |
|
1166 | 0 | setOperationAction( |
1167 | 0 | {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); |
1168 | |
|
1169 | 0 | setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, |
1170 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1171 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1172 | 0 | ISD::VP_SCATTER}, |
1173 | 0 | VT, Custom); |
1174 | |
|
1175 | 0 | setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, |
1176 | 0 | ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, |
1177 | 0 | ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, |
1178 | 0 | VT, Custom); |
1179 | |
|
1180 | 0 | setOperationAction( |
1181 | 0 | {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); |
1182 | | |
1183 | | // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. |
1184 | 0 | if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) |
1185 | 0 | setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); |
1186 | |
|
1187 | 0 | setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, |
1188 | 0 | ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, |
1189 | 0 | VT, Custom); |
1190 | |
|
1191 | 0 | setOperationAction(ISD::VSELECT, VT, Custom); |
1192 | 0 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1193 | |
|
1194 | 0 | setOperationAction( |
1195 | 0 | {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); |
1196 | | |
1197 | | // Custom-lower reduction operations to set up the corresponding custom |
1198 | | // nodes' operands. |
1199 | 0 | setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, |
1200 | 0 | ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, |
1201 | 0 | ISD::VECREDUCE_UMIN}, |
1202 | 0 | VT, Custom); |
1203 | |
|
1204 | 0 | setOperationAction(IntegerVPOps, VT, Custom); |
1205 | |
|
1206 | 0 | if (Subtarget.hasStdExtZvkb()) |
1207 | 0 | setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom); |
1208 | |
|
1209 | 0 | if (Subtarget.hasStdExtZvbb()) { |
1210 | 0 | setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, |
1211 | 0 | ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP}, |
1212 | 0 | VT, Custom); |
1213 | 0 | } else { |
1214 | | // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the |
1215 | | // range of f32. |
1216 | 0 | EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1217 | 0 | if (isTypeLegal(FloatVT)) |
1218 | 0 | setOperationAction( |
1219 | 0 | {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
1220 | 0 | Custom); |
1221 | 0 | } |
1222 | 0 | } |
1223 | |
|
1224 | 0 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { |
1225 | | // There are no extending loads or truncating stores. |
1226 | 0 | for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) { |
1227 | 0 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
1228 | 0 | setTruncStoreAction(VT, InnerVT, Expand); |
1229 | 0 | } |
1230 | |
|
1231 | 0 | if (!useRVVForFixedLengthVectorVT(VT)) |
1232 | 0 | continue; |
1233 | | |
1234 | | // By default everything must be expanded. |
1235 | 0 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1236 | 0 | setOperationAction(Op, VT, Expand); |
1237 | | |
1238 | | // Custom lower fixed vector undefs to scalable vector undefs to avoid |
1239 | | // expansion to a build_vector of 0s. |
1240 | 0 | setOperationAction(ISD::UNDEF, VT, Custom); |
1241 | |
|
1242 | 0 | if (VT.getVectorElementType() == MVT::f16 && |
1243 | 0 | !Subtarget.hasVInstructionsF16()) { |
1244 | 0 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1245 | 0 | setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, |
1246 | 0 | Custom); |
1247 | 0 | setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); |
1248 | 0 | setOperationAction( |
1249 | 0 | {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, |
1250 | 0 | Custom); |
1251 | 0 | setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, |
1252 | 0 | ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, |
1253 | 0 | VT, Custom); |
1254 | 0 | setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, |
1255 | 0 | ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, |
1256 | 0 | VT, Custom); |
1257 | 0 | setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); |
1258 | 0 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1259 | 0 | MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
1260 | | // Don't promote f16 vector operations to f32 if f32 vector type is |
1261 | | // not legal. |
1262 | | // TODO: could split the f16 vector into two vectors and do promotion. |
1263 | 0 | if (!isTypeLegal(F32VecVT)) |
1264 | 0 | continue; |
1265 | 0 | setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT); |
1266 | 0 | setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT); |
1267 | 0 | continue; |
1268 | 0 | } |
1269 | | |
1270 | | // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. |
1271 | 0 | setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, |
1272 | 0 | Custom); |
1273 | |
|
1274 | 0 | setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, |
1275 | 0 | ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, |
1276 | 0 | ISD::EXTRACT_VECTOR_ELT}, |
1277 | 0 | VT, Custom); |
1278 | |
|
1279 | 0 | setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, |
1280 | 0 | ISD::MGATHER, ISD::MSCATTER}, |
1281 | 0 | VT, Custom); |
1282 | |
|
1283 | 0 | setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, |
1284 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_LOAD, |
1285 | 0 | ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, |
1286 | 0 | ISD::VP_SCATTER}, |
1287 | 0 | VT, Custom); |
1288 | |
|
1289 | 0 | setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, |
1290 | 0 | ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, |
1291 | 0 | ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, |
1292 | 0 | ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, |
1293 | 0 | VT, Custom); |
1294 | |
|
1295 | 0 | setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); |
1296 | |
|
1297 | 0 | setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, |
1298 | 0 | ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT}, |
1299 | 0 | VT, Custom); |
1300 | |
|
1301 | 0 | setCondCodeAction(VFPCCToExpand, VT, Expand); |
1302 | |
|
1303 | 0 | setOperationAction(ISD::SETCC, VT, Custom); |
1304 | 0 | setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); |
1305 | 0 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1306 | |
|
1307 | 0 | setOperationAction(ISD::BITCAST, VT, Custom); |
1308 | |
|
1309 | 0 | setOperationAction(FloatingPointVecReduceOps, VT, Custom); |
1310 | |
|
1311 | 0 | setOperationAction(FloatingPointVPOps, VT, Custom); |
1312 | |
|
1313 | 0 | setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT, |
1314 | 0 | Custom); |
1315 | 0 | setOperationAction( |
1316 | 0 | {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL, |
1317 | 0 | ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA, |
1318 | 0 | ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC, |
1319 | 0 | ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, |
1320 | 0 | ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, |
1321 | 0 | VT, Custom); |
1322 | 0 | } |
1323 | | |
1324 | | // Custom-legalize bitcasts from fixed-length vectors to scalar types. |
1325 | 0 | setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, |
1326 | 0 | Custom); |
1327 | 0 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1328 | 0 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
1329 | 0 | if (Subtarget.hasStdExtFOrZfinx()) |
1330 | 0 | setOperationAction(ISD::BITCAST, MVT::f32, Custom); |
1331 | 0 | if (Subtarget.hasStdExtDOrZdinx()) |
1332 | 0 | setOperationAction(ISD::BITCAST, MVT::f64, Custom); |
1333 | 0 | } |
1334 | 0 | } |
1335 | | |
1336 | 1 | if (Subtarget.hasStdExtA()) { |
1337 | 0 | setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand); |
1338 | 0 | if (RV64LegalI32 && Subtarget.is64Bit()) |
1339 | 0 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); |
1340 | 0 | } |
1341 | | |
1342 | 1 | if (Subtarget.hasForcedAtomics()) { |
1343 | | // Force __sync libcalls to be emitted for atomic rmw/cas operations. |
1344 | 0 | setOperationAction( |
1345 | 0 | {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, |
1346 | 0 | ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, |
1347 | 0 | ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, |
1348 | 0 | ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, |
1349 | 0 | XLenVT, LibCall); |
1350 | 0 | } |
1351 | | |
1352 | 1 | if (Subtarget.hasVendorXTHeadMemIdx()) { |
1353 | 0 | for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { |
1354 | 0 | setIndexedLoadAction(im, MVT::i8, Legal); |
1355 | 0 | setIndexedStoreAction(im, MVT::i8, Legal); |
1356 | 0 | setIndexedLoadAction(im, MVT::i16, Legal); |
1357 | 0 | setIndexedStoreAction(im, MVT::i16, Legal); |
1358 | 0 | setIndexedLoadAction(im, MVT::i32, Legal); |
1359 | 0 | setIndexedStoreAction(im, MVT::i32, Legal); |
1360 | |
|
1361 | 0 | if (Subtarget.is64Bit()) { |
1362 | 0 | setIndexedLoadAction(im, MVT::i64, Legal); |
1363 | 0 | setIndexedStoreAction(im, MVT::i64, Legal); |
1364 | 0 | } |
1365 | 0 | } |
1366 | 0 | } |
1367 | | |
1368 | | // Function alignments. |
1369 | 1 | const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); |
1370 | 1 | setMinFunctionAlignment(FunctionAlignment); |
1371 | | // Set preferred alignments. |
1372 | 1 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
1373 | 1 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
1374 | | |
1375 | 1 | setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, |
1376 | 1 | ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, |
1377 | 1 | ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); |
1378 | 1 | if (Subtarget.is64Bit()) |
1379 | 1 | setTargetDAGCombine(ISD::SRA); |
1380 | | |
1381 | 1 | if (Subtarget.hasStdExtFOrZfinx()) |
1382 | 0 | setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); |
1383 | | |
1384 | 1 | if (Subtarget.hasStdExtZbb()) |
1385 | 0 | setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); |
1386 | | |
1387 | 1 | if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) |
1388 | 0 | setTargetDAGCombine(ISD::TRUNCATE); |
1389 | | |
1390 | 1 | if (Subtarget.hasStdExtZbkb()) |
1391 | 0 | setTargetDAGCombine(ISD::BITREVERSE); |
1392 | 1 | if (Subtarget.hasStdExtZfhminOrZhinxmin()) |
1393 | 0 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
1394 | 1 | if (Subtarget.hasStdExtFOrZfinx()) |
1395 | 0 | setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, |
1396 | 0 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); |
1397 | 1 | if (Subtarget.hasVInstructions()) |
1398 | 0 | setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, |
1399 | 0 | ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, |
1400 | 0 | ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR, |
1401 | 0 | ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, |
1402 | 0 | ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL, |
1403 | 0 | ISD::INSERT_VECTOR_ELT}); |
1404 | 1 | if (Subtarget.hasVendorXTHeadMemPair()) |
1405 | 0 | setTargetDAGCombine({ISD::LOAD, ISD::STORE}); |
1406 | 1 | if (Subtarget.useRVVForFixedLengthVectors()) |
1407 | 0 | setTargetDAGCombine(ISD::BITCAST); |
1408 | | |
1409 | 1 | setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); |
1410 | 1 | setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); |
1411 | | |
1412 | | // Disable strict node mutation. |
1413 | 1 | IsStrictFPEnabled = true; |
1414 | 1 | } |
1415 | | |
1416 | | EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, |
1417 | | LLVMContext &Context, |
1418 | 110k | EVT VT) const { |
1419 | 110k | if (!VT.isVector()) |
1420 | 110k | return getPointerTy(DL); |
1421 | 0 | if (Subtarget.hasVInstructions() && |
1422 | 0 | (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) |
1423 | 0 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
1424 | 0 | return VT.changeVectorElementTypeToInteger(); |
1425 | 0 | } |
1426 | | |
1427 | 0 | MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { |
1428 | 0 | return Subtarget.getXLenVT(); |
1429 | 0 | } |
1430 | | |
1431 | | // Return false if we can lower get_vector_length to a vsetvli intrinsic. |
1432 | | bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, |
1433 | | unsigned VF, |
1434 | 0 | bool IsScalable) const { |
1435 | 0 | if (!Subtarget.hasVInstructions()) |
1436 | 0 | return true; |
1437 | | |
1438 | 0 | if (!IsScalable) |
1439 | 0 | return true; |
1440 | | |
1441 | 0 | if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) |
1442 | 0 | return true; |
1443 | | |
1444 | | // Don't allow VF=1 if those types are't legal. |
1445 | 0 | if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen()) |
1446 | 0 | return true; |
1447 | | |
1448 | | // VLEN=32 support is incomplete. |
1449 | 0 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
1450 | 0 | return true; |
1451 | | |
1452 | | // The maximum VF is for the smallest element width with LMUL=8. |
1453 | | // VF must be a power of 2. |
1454 | 0 | unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; |
1455 | 0 | return VF > MaxVF || !isPowerOf2_32(VF); |
1456 | 0 | } |
1457 | | |
1458 | | bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
1459 | | const CallInst &I, |
1460 | | MachineFunction &MF, |
1461 | 0 | unsigned Intrinsic) const { |
1462 | 0 | auto &DL = I.getModule()->getDataLayout(); |
1463 | |
|
1464 | 0 | auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore, |
1465 | 0 | bool IsUnitStrided) { |
1466 | 0 | Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; |
1467 | 0 | Info.ptrVal = I.getArgOperand(PtrOp); |
1468 | 0 | Type *MemTy; |
1469 | 0 | if (IsStore) { |
1470 | | // Store value is the first operand. |
1471 | 0 | MemTy = I.getArgOperand(0)->getType(); |
1472 | 0 | } else { |
1473 | | // Use return type. If it's segment load, return type is a struct. |
1474 | 0 | MemTy = I.getType(); |
1475 | 0 | if (MemTy->isStructTy()) |
1476 | 0 | MemTy = MemTy->getStructElementType(0); |
1477 | 0 | } |
1478 | 0 | if (!IsUnitStrided) |
1479 | 0 | MemTy = MemTy->getScalarType(); |
1480 | |
|
1481 | 0 | Info.memVT = getValueType(DL, MemTy); |
1482 | 0 | Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8); |
1483 | 0 | Info.size = MemoryLocation::UnknownSize; |
1484 | 0 | Info.flags |= |
1485 | 0 | IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; |
1486 | 0 | return true; |
1487 | 0 | }; |
1488 | |
|
1489 | 0 | if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) |
1490 | 0 | Info.flags |= MachineMemOperand::MONonTemporal; |
1491 | |
|
1492 | 0 | Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I); |
1493 | 0 | switch (Intrinsic) { |
1494 | 0 | default: |
1495 | 0 | return false; |
1496 | 0 | case Intrinsic::riscv_masked_atomicrmw_xchg_i32: |
1497 | 0 | case Intrinsic::riscv_masked_atomicrmw_add_i32: |
1498 | 0 | case Intrinsic::riscv_masked_atomicrmw_sub_i32: |
1499 | 0 | case Intrinsic::riscv_masked_atomicrmw_nand_i32: |
1500 | 0 | case Intrinsic::riscv_masked_atomicrmw_max_i32: |
1501 | 0 | case Intrinsic::riscv_masked_atomicrmw_min_i32: |
1502 | 0 | case Intrinsic::riscv_masked_atomicrmw_umax_i32: |
1503 | 0 | case Intrinsic::riscv_masked_atomicrmw_umin_i32: |
1504 | 0 | case Intrinsic::riscv_masked_cmpxchg_i32: |
1505 | 0 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
1506 | 0 | Info.memVT = MVT::i32; |
1507 | 0 | Info.ptrVal = I.getArgOperand(0); |
1508 | 0 | Info.offset = 0; |
1509 | 0 | Info.align = Align(4); |
1510 | 0 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
1511 | 0 | MachineMemOperand::MOVolatile; |
1512 | 0 | return true; |
1513 | 0 | case Intrinsic::riscv_masked_strided_load: |
1514 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false, |
1515 | 0 | /*IsUnitStrided*/ false); |
1516 | 0 | case Intrinsic::riscv_masked_strided_store: |
1517 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true, |
1518 | 0 | /*IsUnitStrided*/ false); |
1519 | 0 | case Intrinsic::riscv_seg2_load: |
1520 | 0 | case Intrinsic::riscv_seg3_load: |
1521 | 0 | case Intrinsic::riscv_seg4_load: |
1522 | 0 | case Intrinsic::riscv_seg5_load: |
1523 | 0 | case Intrinsic::riscv_seg6_load: |
1524 | 0 | case Intrinsic::riscv_seg7_load: |
1525 | 0 | case Intrinsic::riscv_seg8_load: |
1526 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, |
1527 | 0 | /*IsUnitStrided*/ false); |
1528 | 0 | case Intrinsic::riscv_seg2_store: |
1529 | 0 | case Intrinsic::riscv_seg3_store: |
1530 | 0 | case Intrinsic::riscv_seg4_store: |
1531 | 0 | case Intrinsic::riscv_seg5_store: |
1532 | 0 | case Intrinsic::riscv_seg6_store: |
1533 | 0 | case Intrinsic::riscv_seg7_store: |
1534 | 0 | case Intrinsic::riscv_seg8_store: |
1535 | | // Operands are (vec, ..., vec, ptr, vl) |
1536 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1537 | 0 | /*IsStore*/ true, |
1538 | 0 | /*IsUnitStrided*/ false); |
1539 | 0 | case Intrinsic::riscv_vle: |
1540 | 0 | case Intrinsic::riscv_vle_mask: |
1541 | 0 | case Intrinsic::riscv_vleff: |
1542 | 0 | case Intrinsic::riscv_vleff_mask: |
1543 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1544 | 0 | /*IsStore*/ false, |
1545 | 0 | /*IsUnitStrided*/ true); |
1546 | 0 | case Intrinsic::riscv_vse: |
1547 | 0 | case Intrinsic::riscv_vse_mask: |
1548 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1549 | 0 | /*IsStore*/ true, |
1550 | 0 | /*IsUnitStrided*/ true); |
1551 | 0 | case Intrinsic::riscv_vlse: |
1552 | 0 | case Intrinsic::riscv_vlse_mask: |
1553 | 0 | case Intrinsic::riscv_vloxei: |
1554 | 0 | case Intrinsic::riscv_vloxei_mask: |
1555 | 0 | case Intrinsic::riscv_vluxei: |
1556 | 0 | case Intrinsic::riscv_vluxei_mask: |
1557 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1558 | 0 | /*IsStore*/ false, |
1559 | 0 | /*IsUnitStrided*/ false); |
1560 | 0 | case Intrinsic::riscv_vsse: |
1561 | 0 | case Intrinsic::riscv_vsse_mask: |
1562 | 0 | case Intrinsic::riscv_vsoxei: |
1563 | 0 | case Intrinsic::riscv_vsoxei_mask: |
1564 | 0 | case Intrinsic::riscv_vsuxei: |
1565 | 0 | case Intrinsic::riscv_vsuxei_mask: |
1566 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ 1, |
1567 | 0 | /*IsStore*/ true, |
1568 | 0 | /*IsUnitStrided*/ false); |
1569 | 0 | case Intrinsic::riscv_vlseg2: |
1570 | 0 | case Intrinsic::riscv_vlseg3: |
1571 | 0 | case Intrinsic::riscv_vlseg4: |
1572 | 0 | case Intrinsic::riscv_vlseg5: |
1573 | 0 | case Intrinsic::riscv_vlseg6: |
1574 | 0 | case Intrinsic::riscv_vlseg7: |
1575 | 0 | case Intrinsic::riscv_vlseg8: |
1576 | 0 | case Intrinsic::riscv_vlseg2ff: |
1577 | 0 | case Intrinsic::riscv_vlseg3ff: |
1578 | 0 | case Intrinsic::riscv_vlseg4ff: |
1579 | 0 | case Intrinsic::riscv_vlseg5ff: |
1580 | 0 | case Intrinsic::riscv_vlseg6ff: |
1581 | 0 | case Intrinsic::riscv_vlseg7ff: |
1582 | 0 | case Intrinsic::riscv_vlseg8ff: |
1583 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1584 | 0 | /*IsStore*/ false, |
1585 | 0 | /*IsUnitStrided*/ false); |
1586 | 0 | case Intrinsic::riscv_vlseg2_mask: |
1587 | 0 | case Intrinsic::riscv_vlseg3_mask: |
1588 | 0 | case Intrinsic::riscv_vlseg4_mask: |
1589 | 0 | case Intrinsic::riscv_vlseg5_mask: |
1590 | 0 | case Intrinsic::riscv_vlseg6_mask: |
1591 | 0 | case Intrinsic::riscv_vlseg7_mask: |
1592 | 0 | case Intrinsic::riscv_vlseg8_mask: |
1593 | 0 | case Intrinsic::riscv_vlseg2ff_mask: |
1594 | 0 | case Intrinsic::riscv_vlseg3ff_mask: |
1595 | 0 | case Intrinsic::riscv_vlseg4ff_mask: |
1596 | 0 | case Intrinsic::riscv_vlseg5ff_mask: |
1597 | 0 | case Intrinsic::riscv_vlseg6ff_mask: |
1598 | 0 | case Intrinsic::riscv_vlseg7ff_mask: |
1599 | 0 | case Intrinsic::riscv_vlseg8ff_mask: |
1600 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1601 | 0 | /*IsStore*/ false, |
1602 | 0 | /*IsUnitStrided*/ false); |
1603 | 0 | case Intrinsic::riscv_vlsseg2: |
1604 | 0 | case Intrinsic::riscv_vlsseg3: |
1605 | 0 | case Intrinsic::riscv_vlsseg4: |
1606 | 0 | case Intrinsic::riscv_vlsseg5: |
1607 | 0 | case Intrinsic::riscv_vlsseg6: |
1608 | 0 | case Intrinsic::riscv_vlsseg7: |
1609 | 0 | case Intrinsic::riscv_vlsseg8: |
1610 | 0 | case Intrinsic::riscv_vloxseg2: |
1611 | 0 | case Intrinsic::riscv_vloxseg3: |
1612 | 0 | case Intrinsic::riscv_vloxseg4: |
1613 | 0 | case Intrinsic::riscv_vloxseg5: |
1614 | 0 | case Intrinsic::riscv_vloxseg6: |
1615 | 0 | case Intrinsic::riscv_vloxseg7: |
1616 | 0 | case Intrinsic::riscv_vloxseg8: |
1617 | 0 | case Intrinsic::riscv_vluxseg2: |
1618 | 0 | case Intrinsic::riscv_vluxseg3: |
1619 | 0 | case Intrinsic::riscv_vluxseg4: |
1620 | 0 | case Intrinsic::riscv_vluxseg5: |
1621 | 0 | case Intrinsic::riscv_vluxseg6: |
1622 | 0 | case Intrinsic::riscv_vluxseg7: |
1623 | 0 | case Intrinsic::riscv_vluxseg8: |
1624 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1625 | 0 | /*IsStore*/ false, |
1626 | 0 | /*IsUnitStrided*/ false); |
1627 | 0 | case Intrinsic::riscv_vlsseg2_mask: |
1628 | 0 | case Intrinsic::riscv_vlsseg3_mask: |
1629 | 0 | case Intrinsic::riscv_vlsseg4_mask: |
1630 | 0 | case Intrinsic::riscv_vlsseg5_mask: |
1631 | 0 | case Intrinsic::riscv_vlsseg6_mask: |
1632 | 0 | case Intrinsic::riscv_vlsseg7_mask: |
1633 | 0 | case Intrinsic::riscv_vlsseg8_mask: |
1634 | 0 | case Intrinsic::riscv_vloxseg2_mask: |
1635 | 0 | case Intrinsic::riscv_vloxseg3_mask: |
1636 | 0 | case Intrinsic::riscv_vloxseg4_mask: |
1637 | 0 | case Intrinsic::riscv_vloxseg5_mask: |
1638 | 0 | case Intrinsic::riscv_vloxseg6_mask: |
1639 | 0 | case Intrinsic::riscv_vloxseg7_mask: |
1640 | 0 | case Intrinsic::riscv_vloxseg8_mask: |
1641 | 0 | case Intrinsic::riscv_vluxseg2_mask: |
1642 | 0 | case Intrinsic::riscv_vluxseg3_mask: |
1643 | 0 | case Intrinsic::riscv_vluxseg4_mask: |
1644 | 0 | case Intrinsic::riscv_vluxseg5_mask: |
1645 | 0 | case Intrinsic::riscv_vluxseg6_mask: |
1646 | 0 | case Intrinsic::riscv_vluxseg7_mask: |
1647 | 0 | case Intrinsic::riscv_vluxseg8_mask: |
1648 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5, |
1649 | 0 | /*IsStore*/ false, |
1650 | 0 | /*IsUnitStrided*/ false); |
1651 | 0 | case Intrinsic::riscv_vsseg2: |
1652 | 0 | case Intrinsic::riscv_vsseg3: |
1653 | 0 | case Intrinsic::riscv_vsseg4: |
1654 | 0 | case Intrinsic::riscv_vsseg5: |
1655 | 0 | case Intrinsic::riscv_vsseg6: |
1656 | 0 | case Intrinsic::riscv_vsseg7: |
1657 | 0 | case Intrinsic::riscv_vsseg8: |
1658 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2, |
1659 | 0 | /*IsStore*/ true, |
1660 | 0 | /*IsUnitStrided*/ false); |
1661 | 0 | case Intrinsic::riscv_vsseg2_mask: |
1662 | 0 | case Intrinsic::riscv_vsseg3_mask: |
1663 | 0 | case Intrinsic::riscv_vsseg4_mask: |
1664 | 0 | case Intrinsic::riscv_vsseg5_mask: |
1665 | 0 | case Intrinsic::riscv_vsseg6_mask: |
1666 | 0 | case Intrinsic::riscv_vsseg7_mask: |
1667 | 0 | case Intrinsic::riscv_vsseg8_mask: |
1668 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1669 | 0 | /*IsStore*/ true, |
1670 | 0 | /*IsUnitStrided*/ false); |
1671 | 0 | case Intrinsic::riscv_vssseg2: |
1672 | 0 | case Intrinsic::riscv_vssseg3: |
1673 | 0 | case Intrinsic::riscv_vssseg4: |
1674 | 0 | case Intrinsic::riscv_vssseg5: |
1675 | 0 | case Intrinsic::riscv_vssseg6: |
1676 | 0 | case Intrinsic::riscv_vssseg7: |
1677 | 0 | case Intrinsic::riscv_vssseg8: |
1678 | 0 | case Intrinsic::riscv_vsoxseg2: |
1679 | 0 | case Intrinsic::riscv_vsoxseg3: |
1680 | 0 | case Intrinsic::riscv_vsoxseg4: |
1681 | 0 | case Intrinsic::riscv_vsoxseg5: |
1682 | 0 | case Intrinsic::riscv_vsoxseg6: |
1683 | 0 | case Intrinsic::riscv_vsoxseg7: |
1684 | 0 | case Intrinsic::riscv_vsoxseg8: |
1685 | 0 | case Intrinsic::riscv_vsuxseg2: |
1686 | 0 | case Intrinsic::riscv_vsuxseg3: |
1687 | 0 | case Intrinsic::riscv_vsuxseg4: |
1688 | 0 | case Intrinsic::riscv_vsuxseg5: |
1689 | 0 | case Intrinsic::riscv_vsuxseg6: |
1690 | 0 | case Intrinsic::riscv_vsuxseg7: |
1691 | 0 | case Intrinsic::riscv_vsuxseg8: |
1692 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3, |
1693 | 0 | /*IsStore*/ true, |
1694 | 0 | /*IsUnitStrided*/ false); |
1695 | 0 | case Intrinsic::riscv_vssseg2_mask: |
1696 | 0 | case Intrinsic::riscv_vssseg3_mask: |
1697 | 0 | case Intrinsic::riscv_vssseg4_mask: |
1698 | 0 | case Intrinsic::riscv_vssseg5_mask: |
1699 | 0 | case Intrinsic::riscv_vssseg6_mask: |
1700 | 0 | case Intrinsic::riscv_vssseg7_mask: |
1701 | 0 | case Intrinsic::riscv_vssseg8_mask: |
1702 | 0 | case Intrinsic::riscv_vsoxseg2_mask: |
1703 | 0 | case Intrinsic::riscv_vsoxseg3_mask: |
1704 | 0 | case Intrinsic::riscv_vsoxseg4_mask: |
1705 | 0 | case Intrinsic::riscv_vsoxseg5_mask: |
1706 | 0 | case Intrinsic::riscv_vsoxseg6_mask: |
1707 | 0 | case Intrinsic::riscv_vsoxseg7_mask: |
1708 | 0 | case Intrinsic::riscv_vsoxseg8_mask: |
1709 | 0 | case Intrinsic::riscv_vsuxseg2_mask: |
1710 | 0 | case Intrinsic::riscv_vsuxseg3_mask: |
1711 | 0 | case Intrinsic::riscv_vsuxseg4_mask: |
1712 | 0 | case Intrinsic::riscv_vsuxseg5_mask: |
1713 | 0 | case Intrinsic::riscv_vsuxseg6_mask: |
1714 | 0 | case Intrinsic::riscv_vsuxseg7_mask: |
1715 | 0 | case Intrinsic::riscv_vsuxseg8_mask: |
1716 | 0 | return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4, |
1717 | 0 | /*IsStore*/ true, |
1718 | 0 | /*IsUnitStrided*/ false); |
1719 | 0 | } |
1720 | 0 | } |
1721 | | |
1722 | | bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
1723 | | const AddrMode &AM, Type *Ty, |
1724 | | unsigned AS, |
1725 | 766k | Instruction *I) const { |
1726 | | // No global is ever allowed as a base. |
1727 | 766k | if (AM.BaseGV) |
1728 | 157k | return false; |
1729 | | |
1730 | | // RVV instructions only support register addressing. |
1731 | 608k | if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) |
1732 | 0 | return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; |
1733 | | |
1734 | | // Require a 12-bit signed offset. |
1735 | 608k | if (!isInt<12>(AM.BaseOffs)) |
1736 | 4.51k | return false; |
1737 | | |
1738 | 604k | switch (AM.Scale) { |
1739 | 482k | case 0: // "r+i" or just "i", depending on HasBaseReg. |
1740 | 482k | break; |
1741 | 49.9k | case 1: |
1742 | 49.9k | if (!AM.HasBaseReg) // allow "r+i". |
1743 | 0 | break; |
1744 | 49.9k | return false; // disallow "r+r" or "r+r+i". |
1745 | 71.8k | default: |
1746 | 71.8k | return false; |
1747 | 604k | } |
1748 | | |
1749 | 482k | return true; |
1750 | 604k | } |
1751 | | |
1752 | 96.1k | bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
1753 | 96.1k | return isInt<12>(Imm); |
1754 | 96.1k | } |
1755 | | |
1756 | 10.5k | bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
1757 | 10.5k | return isInt<12>(Imm); |
1758 | 10.5k | } |
1759 | | |
1760 | | // On RV32, 64-bit integers are split into their high and low parts and held |
1761 | | // in two different registers, so the trunc is free since the low register can |
1762 | | // just be used. |
1763 | | // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of |
1764 | | // isTruncateFree? |
1765 | 0 | bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { |
1766 | 0 | if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) |
1767 | 0 | return false; |
1768 | 0 | unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); |
1769 | 0 | unsigned DestBits = DstTy->getPrimitiveSizeInBits(); |
1770 | 0 | return (SrcBits == 64 && DestBits == 32); |
1771 | 0 | } |
1772 | | |
1773 | 649k | bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { |
1774 | | // We consider i64->i32 free on RV64 since we have good selection of W |
1775 | | // instructions that make promoting operations back to i64 free in many cases. |
1776 | 649k | if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || |
1777 | 649k | !DstVT.isInteger()) |
1778 | 0 | return false; |
1779 | 649k | unsigned SrcBits = SrcVT.getSizeInBits(); |
1780 | 649k | unsigned DestBits = DstVT.getSizeInBits(); |
1781 | 649k | return (SrcBits == 64 && DestBits == 32); |
1782 | 649k | } |
1783 | | |
1784 | 23.9k | bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
1785 | | // Zexts are free if they can be combined with a load. |
1786 | | // Don't advertise i32->i64 zextload as being free for RV64. It interacts |
1787 | | // poorly with type legalization of compares preferring sext. |
1788 | 23.9k | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
1789 | 9.23k | EVT MemVT = LD->getMemoryVT(); |
1790 | 9.23k | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
1791 | 9.23k | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
1792 | 1.63k | LD->getExtensionType() == ISD::ZEXTLOAD)) |
1793 | 1.63k | return true; |
1794 | 9.23k | } |
1795 | | |
1796 | 22.3k | return TargetLowering::isZExtFree(Val, VT2); |
1797 | 23.9k | } |
1798 | | |
1799 | 36.0k | bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { |
1800 | 36.0k | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
1801 | 36.0k | } |
1802 | | |
1803 | 0 | bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
1804 | 0 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); |
1805 | 0 | } |
1806 | | |
1807 | 0 | bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { |
1808 | 0 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip(); |
1809 | 0 | } |
1810 | | |
1811 | 0 | bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { |
1812 | 0 | return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() || |
1813 | 0 | Subtarget.hasVendorXCVbitmanip(); |
1814 | 0 | } |
1815 | | |
1816 | | bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( |
1817 | 2 | const Instruction &AndI) const { |
1818 | | // We expect to be able to match a bit extraction instruction if the Zbs |
1819 | | // extension is supported and the mask is a power of two. However, we |
1820 | | // conservatively return false if the mask would fit in an ANDI instruction, |
1821 | | // on the basis that it's possible the sinking+duplication of the AND in |
1822 | | // CodeGenPrepare triggered by this hook wouldn't decrease the instruction |
1823 | | // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). |
1824 | 2 | if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) |
1825 | 2 | return false; |
1826 | 0 | ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); |
1827 | 0 | if (!Mask) |
1828 | 0 | return false; |
1829 | 0 | return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); |
1830 | 0 | } |
1831 | | |
1832 | 901 | bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { |
1833 | 901 | EVT VT = Y.getValueType(); |
1834 | | |
1835 | | // FIXME: Support vectors once we have tests. |
1836 | 901 | if (VT.isVector()) |
1837 | 0 | return false; |
1838 | | |
1839 | 901 | return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
1840 | 901 | !isa<ConstantSDNode>(Y); |
1841 | 901 | } |
1842 | | |
1843 | 0 | bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
1844 | | // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. |
1845 | 0 | if (Subtarget.hasStdExtZbs()) |
1846 | 0 | return X.getValueType().isScalarInteger(); |
1847 | 0 | auto *C = dyn_cast<ConstantSDNode>(Y); |
1848 | | // XTheadBs provides th.tst (similar to bexti), if Y is a constant |
1849 | 0 | if (Subtarget.hasVendorXTHeadBs()) |
1850 | 0 | return C != nullptr; |
1851 | | // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. |
1852 | 0 | return C && C->getAPIntValue().ule(10); |
1853 | 0 | } |
1854 | | |
1855 | | bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, |
1856 | 551k | EVT VT) const { |
1857 | | // Only enable for rvv. |
1858 | 551k | if (!VT.isVector() || !Subtarget.hasVInstructions()) |
1859 | 551k | return false; |
1860 | | |
1861 | 0 | if (VT.isFixedLengthVector() && !isTypeLegal(VT)) |
1862 | 0 | return false; |
1863 | | |
1864 | 0 | return true; |
1865 | 0 | } |
1866 | | |
1867 | | bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
1868 | 0 | Type *Ty) const { |
1869 | 0 | assert(Ty->isIntegerTy()); |
1870 | | |
1871 | 0 | unsigned BitSize = Ty->getIntegerBitWidth(); |
1872 | 0 | if (BitSize > Subtarget.getXLen()) |
1873 | 0 | return false; |
1874 | | |
1875 | | // Fast path, assume 32-bit immediates are cheap. |
1876 | 0 | int64_t Val = Imm.getSExtValue(); |
1877 | 0 | if (isInt<32>(Val)) |
1878 | 0 | return true; |
1879 | | |
1880 | | // A constant pool entry may be more aligned thant he load we're trying to |
1881 | | // replace. If we don't support unaligned scalar mem, prefer the constant |
1882 | | // pool. |
1883 | | // TODO: Can the caller pass down the alignment? |
1884 | 0 | if (!Subtarget.hasFastUnalignedAccess()) |
1885 | 0 | return true; |
1886 | | |
1887 | | // Prefer to keep the load if it would require many instructions. |
1888 | | // This uses the same threshold we use for constant pools but doesn't |
1889 | | // check useConstantPoolForLargeInts. |
1890 | | // TODO: Should we keep the load only when we're definitely going to emit a |
1891 | | // constant pool? |
1892 | | |
1893 | 0 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget); |
1894 | 0 | return Seq.size() <= Subtarget.getMaxBuildIntsCost(); |
1895 | 0 | } |
1896 | | |
1897 | | bool RISCVTargetLowering:: |
1898 | | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
1899 | | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
1900 | | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
1901 | 2 | SelectionDAG &DAG) const { |
1902 | | // One interesting pattern that we'd want to form is 'bit extract': |
1903 | | // ((1 >> Y) & 1) ==/!= 0 |
1904 | | // But we also need to be careful not to try to reverse that fold. |
1905 | | |
1906 | | // Is this '((1 >> Y) & 1)'? |
1907 | 2 | if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) |
1908 | 0 | return false; // Keep the 'bit extract' pattern. |
1909 | | |
1910 | | // Will this be '((1 >> Y) & 1)' after the transform? |
1911 | 2 | if (NewShiftOpcode == ISD::SRL && CC->isOne()) |
1912 | 0 | return true; // Do form the 'bit extract' pattern. |
1913 | | |
1914 | | // If 'X' is a constant, and we transform, then we will immediately |
1915 | | // try to undo the fold, thus causing endless combine loop. |
1916 | | // So only do the transform if X is not a constant. This matches the default |
1917 | | // implementation of this function. |
1918 | 2 | return !XC; |
1919 | 2 | } |
1920 | | |
1921 | 0 | bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { |
1922 | 0 | switch (Opcode) { |
1923 | 0 | case Instruction::Add: |
1924 | 0 | case Instruction::Sub: |
1925 | 0 | case Instruction::Mul: |
1926 | 0 | case Instruction::And: |
1927 | 0 | case Instruction::Or: |
1928 | 0 | case Instruction::Xor: |
1929 | 0 | case Instruction::FAdd: |
1930 | 0 | case Instruction::FSub: |
1931 | 0 | case Instruction::FMul: |
1932 | 0 | case Instruction::FDiv: |
1933 | 0 | case Instruction::ICmp: |
1934 | 0 | case Instruction::FCmp: |
1935 | 0 | return true; |
1936 | 0 | case Instruction::Shl: |
1937 | 0 | case Instruction::LShr: |
1938 | 0 | case Instruction::AShr: |
1939 | 0 | case Instruction::UDiv: |
1940 | 0 | case Instruction::SDiv: |
1941 | 0 | case Instruction::URem: |
1942 | 0 | case Instruction::SRem: |
1943 | 0 | return Operand == 1; |
1944 | 0 | default: |
1945 | 0 | return false; |
1946 | 0 | } |
1947 | 0 | } |
1948 | | |
1949 | | |
1950 | 0 | bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { |
1951 | 0 | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
1952 | 0 | return false; |
1953 | | |
1954 | 0 | if (canSplatOperand(I->getOpcode(), Operand)) |
1955 | 0 | return true; |
1956 | | |
1957 | 0 | auto *II = dyn_cast<IntrinsicInst>(I); |
1958 | 0 | if (!II) |
1959 | 0 | return false; |
1960 | | |
1961 | 0 | switch (II->getIntrinsicID()) { |
1962 | 0 | case Intrinsic::fma: |
1963 | 0 | case Intrinsic::vp_fma: |
1964 | 0 | return Operand == 0 || Operand == 1; |
1965 | 0 | case Intrinsic::vp_shl: |
1966 | 0 | case Intrinsic::vp_lshr: |
1967 | 0 | case Intrinsic::vp_ashr: |
1968 | 0 | case Intrinsic::vp_udiv: |
1969 | 0 | case Intrinsic::vp_sdiv: |
1970 | 0 | case Intrinsic::vp_urem: |
1971 | 0 | case Intrinsic::vp_srem: |
1972 | 0 | return Operand == 1; |
1973 | | // These intrinsics are commutative. |
1974 | 0 | case Intrinsic::vp_add: |
1975 | 0 | case Intrinsic::vp_mul: |
1976 | 0 | case Intrinsic::vp_and: |
1977 | 0 | case Intrinsic::vp_or: |
1978 | 0 | case Intrinsic::vp_xor: |
1979 | 0 | case Intrinsic::vp_fadd: |
1980 | 0 | case Intrinsic::vp_fmul: |
1981 | 0 | case Intrinsic::vp_icmp: |
1982 | 0 | case Intrinsic::vp_fcmp: |
1983 | | // These intrinsics have 'vr' versions. |
1984 | 0 | case Intrinsic::vp_sub: |
1985 | 0 | case Intrinsic::vp_fsub: |
1986 | 0 | case Intrinsic::vp_fdiv: |
1987 | 0 | return Operand == 0 || Operand == 1; |
1988 | 0 | default: |
1989 | 0 | return false; |
1990 | 0 | } |
1991 | 0 | } |
1992 | | |
1993 | | /// Check if sinking \p I's operands to I's basic block is profitable, because |
1994 | | /// the operands can be folded into a target instruction, e.g. |
1995 | | /// splats of scalars can fold into vector instructions. |
1996 | | bool RISCVTargetLowering::shouldSinkOperands( |
1997 | 372k | Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
1998 | 372k | using namespace llvm::PatternMatch; |
1999 | | |
2000 | 372k | if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) |
2001 | 372k | return false; |
2002 | | |
2003 | 0 | for (auto OpIdx : enumerate(I->operands())) { |
2004 | 0 | if (!canSplatOperand(I, OpIdx.index())) |
2005 | 0 | continue; |
2006 | | |
2007 | 0 | Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); |
2008 | | // Make sure we are not already sinking this operand |
2009 | 0 | if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) |
2010 | 0 | continue; |
2011 | | |
2012 | | // We are looking for a splat that can be sunk. |
2013 | 0 | if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), |
2014 | 0 | m_Undef(), m_ZeroMask()))) |
2015 | 0 | continue; |
2016 | | |
2017 | | // Don't sink i1 splats. |
2018 | 0 | if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1)) |
2019 | 0 | continue; |
2020 | | |
2021 | | // All uses of the shuffle should be sunk to avoid duplicating it across gpr |
2022 | | // and vector registers |
2023 | 0 | for (Use &U : Op->uses()) { |
2024 | 0 | Instruction *Insn = cast<Instruction>(U.getUser()); |
2025 | 0 | if (!canSplatOperand(Insn, U.getOperandNo())) |
2026 | 0 | return false; |
2027 | 0 | } |
2028 | | |
2029 | 0 | Ops.push_back(&Op->getOperandUse(0)); |
2030 | 0 | Ops.push_back(&OpIdx.value()); |
2031 | 0 | } |
2032 | 0 | return true; |
2033 | 0 | } |
2034 | | |
2035 | 0 | bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
2036 | 0 | unsigned Opc = VecOp.getOpcode(); |
2037 | | |
2038 | | // Assume target opcodes can't be scalarized. |
2039 | | // TODO - do we have any exceptions? |
2040 | 0 | if (Opc >= ISD::BUILTIN_OP_END) |
2041 | 0 | return false; |
2042 | | |
2043 | | // If the vector op is not supported, try to convert to scalar. |
2044 | 0 | EVT VecVT = VecOp.getValueType(); |
2045 | 0 | if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) |
2046 | 0 | return true; |
2047 | | |
2048 | | // If the vector op is supported, but the scalar op is not, the transform may |
2049 | | // not be worthwhile. |
2050 | | // Permit a vector binary operation can be converted to scalar binary |
2051 | | // operation which is custom lowered with illegal type. |
2052 | 0 | EVT ScalarVT = VecVT.getScalarType(); |
2053 | 0 | return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) || |
2054 | 0 | isOperationCustom(Opc, ScalarVT); |
2055 | 0 | } |
2056 | | |
2057 | | bool RISCVTargetLowering::isOffsetFoldingLegal( |
2058 | 91.1k | const GlobalAddressSDNode *GA) const { |
2059 | | // In order to maximise the opportunity for common subexpression elimination, |
2060 | | // keep a separate ADD node for the global address offset instead of folding |
2061 | | // it in the global address node. Later peephole optimisations may choose to |
2062 | | // fold it back in when profitable. |
2063 | 91.1k | return false; |
2064 | 91.1k | } |
2065 | | |
2066 | | // Return one of the followings: |
2067 | | // (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value. |
2068 | | // (2) `{0-31 value, true}` if Imm is negative and FLI is available for its |
2069 | | // positive counterpart, which will be materialized from the first returned |
2070 | | // element. The second returned element indicated that there should be a FNEG |
2071 | | // followed. |
2072 | | // (3) `{-1, _}` if there is no way FLI can be used to materialize Imm. |
2073 | | std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, |
2074 | 0 | EVT VT) const { |
2075 | 0 | if (!Subtarget.hasStdExtZfa()) |
2076 | 0 | return std::make_pair(-1, false); |
2077 | | |
2078 | 0 | bool IsSupportedVT = false; |
2079 | 0 | if (VT == MVT::f16) { |
2080 | 0 | IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh(); |
2081 | 0 | } else if (VT == MVT::f32) { |
2082 | 0 | IsSupportedVT = true; |
2083 | 0 | } else if (VT == MVT::f64) { |
2084 | 0 | assert(Subtarget.hasStdExtD() && "Expect D extension"); |
2085 | 0 | IsSupportedVT = true; |
2086 | 0 | } |
2087 | | |
2088 | 0 | if (!IsSupportedVT) |
2089 | 0 | return std::make_pair(-1, false); |
2090 | | |
2091 | 0 | int Index = RISCVLoadFPImm::getLoadFPImm(Imm); |
2092 | 0 | if (Index < 0 && Imm.isNegative()) |
2093 | | // Try the combination of its positive counterpart + FNEG. |
2094 | 0 | return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true); |
2095 | 0 | else |
2096 | 0 | return std::make_pair(Index, false); |
2097 | 0 | } |
2098 | | |
2099 | | bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
2100 | 2.61k | bool ForCodeSize) const { |
2101 | 2.61k | bool IsLegalVT = false; |
2102 | 2.61k | if (VT == MVT::f16) |
2103 | 0 | IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin(); |
2104 | 2.61k | else if (VT == MVT::f32) |
2105 | 1.35k | IsLegalVT = Subtarget.hasStdExtFOrZfinx(); |
2106 | 1.26k | else if (VT == MVT::f64) |
2107 | 1.26k | IsLegalVT = Subtarget.hasStdExtDOrZdinx(); |
2108 | 0 | else if (VT == MVT::bf16) |
2109 | 0 | IsLegalVT = Subtarget.hasStdExtZfbfmin(); |
2110 | | |
2111 | 2.61k | if (!IsLegalVT) |
2112 | 2.61k | return false; |
2113 | | |
2114 | 0 | if (getLegalZfaFPImm(Imm, VT).first >= 0) |
2115 | 0 | return true; |
2116 | | |
2117 | | // Cannot create a 64 bit floating-point immediate value for rv32. |
2118 | 0 | if (Subtarget.getXLen() < VT.getScalarSizeInBits()) { |
2119 | | // td can handle +0.0 or -0.0 already. |
2120 | | // -0.0 can be created by fmv + fneg. |
2121 | 0 | return Imm.isZero(); |
2122 | 0 | } |
2123 | | |
2124 | | // Special case: fmv + fneg |
2125 | 0 | if (Imm.isNegZero()) |
2126 | 0 | return true; |
2127 | | |
2128 | | // Building an integer and then converting requires a fmv at the end of |
2129 | | // the integer sequence. |
2130 | 0 | const int Cost = |
2131 | 0 | 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(), |
2132 | 0 | Subtarget); |
2133 | 0 | return Cost <= FPImmCost; |
2134 | 0 | } |
2135 | | |
2136 | | // TODO: This is very conservative. |
2137 | | bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
2138 | 0 | unsigned Index) const { |
2139 | 0 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
2140 | 0 | return false; |
2141 | | |
2142 | | // Only support extracting a fixed from a fixed vector for now. |
2143 | 0 | if (ResVT.isScalableVector() || SrcVT.isScalableVector()) |
2144 | 0 | return false; |
2145 | | |
2146 | 0 | unsigned ResElts = ResVT.getVectorNumElements(); |
2147 | 0 | unsigned SrcElts = SrcVT.getVectorNumElements(); |
2148 | | |
2149 | | // Convervatively only handle extracting half of a vector. |
2150 | | // TODO: Relax this. |
2151 | 0 | if ((ResElts * 2) != SrcElts) |
2152 | 0 | return false; |
2153 | | |
2154 | | // The smallest type we can slide is i8. |
2155 | | // TODO: We can extract index 0 from a mask vector without a slide. |
2156 | 0 | if (ResVT.getVectorElementType() == MVT::i1) |
2157 | 0 | return false; |
2158 | | |
2159 | | // Slide can support arbitrary index, but we only treat vslidedown.vi as |
2160 | | // cheap. |
2161 | 0 | if (Index >= 32) |
2162 | 0 | return false; |
2163 | | |
2164 | | // TODO: We can do arbitrary slidedowns, but for now only support extracting |
2165 | | // the upper half of a vector until we have more test coverage. |
2166 | 0 | return Index == 0 || Index == ResElts; |
2167 | 0 | } |
2168 | | |
2169 | | MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
2170 | | CallingConv::ID CC, |
2171 | 250k | EVT VT) const { |
2172 | | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2173 | | // We might still end up using a GPR but that will be decided based on ABI. |
2174 | 250k | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2175 | 250k | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2176 | 0 | return MVT::f32; |
2177 | | |
2178 | 250k | MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
2179 | | |
2180 | 250k | if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32) |
2181 | 0 | return MVT::i64; |
2182 | | |
2183 | 250k | return PartVT; |
2184 | 250k | } |
2185 | | |
2186 | | unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
2187 | | CallingConv::ID CC, |
2188 | 250k | EVT VT) const { |
2189 | | // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. |
2190 | | // We might still end up using a GPR but that will be decided based on ABI. |
2191 | 250k | if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() && |
2192 | 250k | !Subtarget.hasStdExtZfhminOrZhinxmin()) |
2193 | 0 | return 1; |
2194 | | |
2195 | 250k | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
2196 | 250k | } |
2197 | | |
2198 | | unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv( |
2199 | | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
2200 | 0 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
2201 | 0 | unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv( |
2202 | 0 | Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); |
2203 | |
|
2204 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32) |
2205 | 0 | IntermediateVT = MVT::i64; |
2206 | |
|
2207 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32) |
2208 | 0 | RegisterVT = MVT::i64; |
2209 | |
|
2210 | 0 | return NumRegs; |
2211 | 0 | } |
2212 | | |
2213 | | // Changes the condition code and swaps operands if necessary, so the SetCC |
2214 | | // operation matches one of the comparisons supported directly by branches |
2215 | | // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare |
2216 | | // with 1/-1. |
2217 | | static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
2218 | 1.01k | ISD::CondCode &CC, SelectionDAG &DAG) { |
2219 | | // If this is a single bit test that can't be handled by ANDI, shift the |
2220 | | // bit to be tested to the MSB and perform a signed compare with 0. |
2221 | 1.01k | if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && |
2222 | 1.01k | LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && |
2223 | 1.01k | isa<ConstantSDNode>(LHS.getOperand(1))) { |
2224 | 61 | uint64_t Mask = LHS.getConstantOperandVal(1); |
2225 | 61 | if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { |
2226 | 2 | unsigned ShAmt = 0; |
2227 | 2 | if (isPowerOf2_64(Mask)) { |
2228 | 0 | CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
2229 | 0 | ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); |
2230 | 2 | } else { |
2231 | 2 | ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); |
2232 | 2 | } |
2233 | | |
2234 | 2 | LHS = LHS.getOperand(0); |
2235 | 2 | if (ShAmt != 0) |
2236 | 2 | LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, |
2237 | 2 | DAG.getConstant(ShAmt, DL, LHS.getValueType())); |
2238 | 2 | return; |
2239 | 2 | } |
2240 | 61 | } |
2241 | | |
2242 | 1.01k | if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
2243 | 659 | int64_t C = RHSC->getSExtValue(); |
2244 | 659 | switch (CC) { |
2245 | 337 | default: break; |
2246 | 337 | case ISD::SETGT: |
2247 | | // Convert X > -1 to X >= 0. |
2248 | 201 | if (C == -1) { |
2249 | 80 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
2250 | 80 | CC = ISD::SETGE; |
2251 | 80 | return; |
2252 | 80 | } |
2253 | 121 | break; |
2254 | 121 | case ISD::SETLT: |
2255 | | // Convert X < 1 to 0 >= X. |
2256 | 121 | if (C == 1) { |
2257 | 98 | RHS = LHS; |
2258 | 98 | LHS = DAG.getConstant(0, DL, RHS.getValueType()); |
2259 | 98 | CC = ISD::SETGE; |
2260 | 98 | return; |
2261 | 98 | } |
2262 | 23 | break; |
2263 | 659 | } |
2264 | 659 | } |
2265 | | |
2266 | 837 | switch (CC) { |
2267 | 497 | default: |
2268 | 497 | break; |
2269 | 497 | case ISD::SETGT: |
2270 | 216 | case ISD::SETLE: |
2271 | 293 | case ISD::SETUGT: |
2272 | 340 | case ISD::SETULE: |
2273 | 340 | CC = ISD::getSetCCSwappedOperands(CC); |
2274 | 340 | std::swap(LHS, RHS); |
2275 | 340 | break; |
2276 | 837 | } |
2277 | 837 | } |
2278 | | |
2279 | 0 | RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { |
2280 | 0 | assert(VT.isScalableVector() && "Expecting a scalable vector type"); |
2281 | 0 | unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); |
2282 | 0 | if (VT.getVectorElementType() == MVT::i1) |
2283 | 0 | KnownSize *= 8; |
2284 | |
|
2285 | 0 | switch (KnownSize) { |
2286 | 0 | default: |
2287 | 0 | llvm_unreachable("Invalid LMUL."); |
2288 | 0 | case 8: |
2289 | 0 | return RISCVII::VLMUL::LMUL_F8; |
2290 | 0 | case 16: |
2291 | 0 | return RISCVII::VLMUL::LMUL_F4; |
2292 | 0 | case 32: |
2293 | 0 | return RISCVII::VLMUL::LMUL_F2; |
2294 | 0 | case 64: |
2295 | 0 | return RISCVII::VLMUL::LMUL_1; |
2296 | 0 | case 128: |
2297 | 0 | return RISCVII::VLMUL::LMUL_2; |
2298 | 0 | case 256: |
2299 | 0 | return RISCVII::VLMUL::LMUL_4; |
2300 | 0 | case 512: |
2301 | 0 | return RISCVII::VLMUL::LMUL_8; |
2302 | 0 | } |
2303 | 0 | } |
2304 | | |
2305 | 0 | unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { |
2306 | 0 | switch (LMul) { |
2307 | 0 | default: |
2308 | 0 | llvm_unreachable("Invalid LMUL."); |
2309 | 0 | case RISCVII::VLMUL::LMUL_F8: |
2310 | 0 | case RISCVII::VLMUL::LMUL_F4: |
2311 | 0 | case RISCVII::VLMUL::LMUL_F2: |
2312 | 0 | case RISCVII::VLMUL::LMUL_1: |
2313 | 0 | return RISCV::VRRegClassID; |
2314 | 0 | case RISCVII::VLMUL::LMUL_2: |
2315 | 0 | return RISCV::VRM2RegClassID; |
2316 | 0 | case RISCVII::VLMUL::LMUL_4: |
2317 | 0 | return RISCV::VRM4RegClassID; |
2318 | 0 | case RISCVII::VLMUL::LMUL_8: |
2319 | 0 | return RISCV::VRM8RegClassID; |
2320 | 0 | } |
2321 | 0 | } |
2322 | | |
2323 | 0 | unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { |
2324 | 0 | RISCVII::VLMUL LMUL = getLMUL(VT); |
2325 | 0 | if (LMUL == RISCVII::VLMUL::LMUL_F8 || |
2326 | 0 | LMUL == RISCVII::VLMUL::LMUL_F4 || |
2327 | 0 | LMUL == RISCVII::VLMUL::LMUL_F2 || |
2328 | 0 | LMUL == RISCVII::VLMUL::LMUL_1) { |
2329 | 0 | static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, |
2330 | 0 | "Unexpected subreg numbering"); |
2331 | 0 | return RISCV::sub_vrm1_0 + Index; |
2332 | 0 | } |
2333 | 0 | if (LMUL == RISCVII::VLMUL::LMUL_2) { |
2334 | 0 | static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, |
2335 | 0 | "Unexpected subreg numbering"); |
2336 | 0 | return RISCV::sub_vrm2_0 + Index; |
2337 | 0 | } |
2338 | 0 | if (LMUL == RISCVII::VLMUL::LMUL_4) { |
2339 | 0 | static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, |
2340 | 0 | "Unexpected subreg numbering"); |
2341 | 0 | return RISCV::sub_vrm4_0 + Index; |
2342 | 0 | } |
2343 | 0 | llvm_unreachable("Invalid vector type."); |
2344 | 0 | } |
2345 | | |
2346 | 0 | unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { |
2347 | 0 | if (VT.getVectorElementType() == MVT::i1) |
2348 | 0 | return RISCV::VRRegClassID; |
2349 | 0 | return getRegClassIDForLMUL(getLMUL(VT)); |
2350 | 0 | } |
2351 | | |
2352 | | // Attempt to decompose a subvector insert/extract between VecVT and |
2353 | | // SubVecVT via subregister indices. Returns the subregister index that |
2354 | | // can perform the subvector insert/extract with the given element index, as |
2355 | | // well as the index corresponding to any leftover subvectors that must be |
2356 | | // further inserted/extracted within the register class for SubVecVT. |
2357 | | std::pair<unsigned, unsigned> |
2358 | | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
2359 | | MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, |
2360 | 0 | const RISCVRegisterInfo *TRI) { |
2361 | 0 | static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && |
2362 | 0 | RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && |
2363 | 0 | RISCV::VRM2RegClassID > RISCV::VRRegClassID), |
2364 | 0 | "Register classes not ordered"); |
2365 | 0 | unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); |
2366 | 0 | unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); |
2367 | | // Try to compose a subregister index that takes us from the incoming |
2368 | | // LMUL>1 register class down to the outgoing one. At each step we half |
2369 | | // the LMUL: |
2370 | | // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 |
2371 | | // Note that this is not guaranteed to find a subregister index, such as |
2372 | | // when we are extracting from one VR type to another. |
2373 | 0 | unsigned SubRegIdx = RISCV::NoSubRegister; |
2374 | 0 | for (const unsigned RCID : |
2375 | 0 | {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) |
2376 | 0 | if (VecRegClassID > RCID && SubRegClassID <= RCID) { |
2377 | 0 | VecVT = VecVT.getHalfNumVectorElementsVT(); |
2378 | 0 | bool IsHi = |
2379 | 0 | InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); |
2380 | 0 | SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, |
2381 | 0 | getSubregIndexByMVT(VecVT, IsHi)); |
2382 | 0 | if (IsHi) |
2383 | 0 | InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); |
2384 | 0 | } |
2385 | 0 | return {SubRegIdx, InsertExtractIdx}; |
2386 | 0 | } |
2387 | | |
2388 | | // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar |
2389 | | // stores for those types. |
2390 | 390k | bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
2391 | 390k | return !Subtarget.useRVVForFixedLengthVectors() || |
2392 | 390k | (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); |
2393 | 390k | } |
2394 | | |
2395 | 0 | bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { |
2396 | 0 | if (!ScalarTy.isSimple()) |
2397 | 0 | return false; |
2398 | 0 | switch (ScalarTy.getSimpleVT().SimpleTy) { |
2399 | 0 | case MVT::iPTR: |
2400 | 0 | return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; |
2401 | 0 | case MVT::i8: |
2402 | 0 | case MVT::i16: |
2403 | 0 | case MVT::i32: |
2404 | 0 | return true; |
2405 | 0 | case MVT::i64: |
2406 | 0 | return Subtarget.hasVInstructionsI64(); |
2407 | 0 | case MVT::f16: |
2408 | 0 | return Subtarget.hasVInstructionsF16(); |
2409 | 0 | case MVT::f32: |
2410 | 0 | return Subtarget.hasVInstructionsF32(); |
2411 | 0 | case MVT::f64: |
2412 | 0 | return Subtarget.hasVInstructionsF64(); |
2413 | 0 | default: |
2414 | 0 | return false; |
2415 | 0 | } |
2416 | 0 | } |
2417 | | |
2418 | | |
2419 | 0 | unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { |
2420 | 0 | return NumRepeatedDivisors; |
2421 | 0 | } |
2422 | | |
2423 | 0 | static SDValue getVLOperand(SDValue Op) { |
2424 | 0 | assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
2425 | 0 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
2426 | 0 | "Unexpected opcode"); |
2427 | 0 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
2428 | 0 | unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); |
2429 | 0 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
2430 | 0 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); |
2431 | 0 | if (!II) |
2432 | 0 | return SDValue(); |
2433 | 0 | return Op.getOperand(II->VLOperand + 1 + HasChain); |
2434 | 0 | } |
2435 | | |
2436 | | static bool useRVVForFixedLengthVectorVT(MVT VT, |
2437 | 0 | const RISCVSubtarget &Subtarget) { |
2438 | 0 | assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); |
2439 | 0 | if (!Subtarget.useRVVForFixedLengthVectors()) |
2440 | 0 | return false; |
2441 | | |
2442 | | // We only support a set of vector types with a consistent maximum fixed size |
2443 | | // across all supported vector element types to avoid legalization issues. |
2444 | | // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest |
2445 | | // fixed-length vector type we support is 1024 bytes. |
2446 | 0 | if (VT.getFixedSizeInBits() > 1024 * 8) |
2447 | 0 | return false; |
2448 | | |
2449 | 0 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2450 | |
|
2451 | 0 | MVT EltVT = VT.getVectorElementType(); |
2452 | | |
2453 | | // Don't use RVV for vectors we cannot scalarize if required. |
2454 | 0 | switch (EltVT.SimpleTy) { |
2455 | | // i1 is supported but has different rules. |
2456 | 0 | default: |
2457 | 0 | return false; |
2458 | 0 | case MVT::i1: |
2459 | | // Masks can only use a single register. |
2460 | 0 | if (VT.getVectorNumElements() > MinVLen) |
2461 | 0 | return false; |
2462 | 0 | MinVLen /= 8; |
2463 | 0 | break; |
2464 | 0 | case MVT::i8: |
2465 | 0 | case MVT::i16: |
2466 | 0 | case MVT::i32: |
2467 | 0 | break; |
2468 | 0 | case MVT::i64: |
2469 | 0 | if (!Subtarget.hasVInstructionsI64()) |
2470 | 0 | return false; |
2471 | 0 | break; |
2472 | 0 | case MVT::f16: |
2473 | 0 | if (!Subtarget.hasVInstructionsF16Minimal()) |
2474 | 0 | return false; |
2475 | 0 | break; |
2476 | 0 | case MVT::f32: |
2477 | 0 | if (!Subtarget.hasVInstructionsF32()) |
2478 | 0 | return false; |
2479 | 0 | break; |
2480 | 0 | case MVT::f64: |
2481 | 0 | if (!Subtarget.hasVInstructionsF64()) |
2482 | 0 | return false; |
2483 | 0 | break; |
2484 | 0 | } |
2485 | | |
2486 | | // Reject elements larger than ELEN. |
2487 | 0 | if (EltVT.getSizeInBits() > Subtarget.getELen()) |
2488 | 0 | return false; |
2489 | | |
2490 | 0 | unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); |
2491 | | // Don't use RVV for types that don't fit. |
2492 | 0 | if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) |
2493 | 0 | return false; |
2494 | | |
2495 | | // TODO: Perhaps an artificial restriction, but worth having whilst getting |
2496 | | // the base fixed length RVV support in place. |
2497 | 0 | if (!VT.isPow2VectorType()) |
2498 | 0 | return false; |
2499 | | |
2500 | 0 | return true; |
2501 | 0 | } |
2502 | | |
2503 | 0 | bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { |
2504 | 0 | return ::useRVVForFixedLengthVectorVT(VT, Subtarget); |
2505 | 0 | } |
2506 | | |
2507 | | // Return the largest legal scalable vector type that matches VT's element type. |
2508 | | static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, |
2509 | 0 | const RISCVSubtarget &Subtarget) { |
2510 | | // This may be called before legal types are setup. |
2511 | 0 | assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || |
2512 | 0 | useRVVForFixedLengthVectorVT(VT, Subtarget)) && |
2513 | 0 | "Expected legal fixed length vector!"); |
2514 | | |
2515 | 0 | unsigned MinVLen = Subtarget.getRealMinVLen(); |
2516 | 0 | unsigned MaxELen = Subtarget.getELen(); |
2517 | |
|
2518 | 0 | MVT EltVT = VT.getVectorElementType(); |
2519 | 0 | switch (EltVT.SimpleTy) { |
2520 | 0 | default: |
2521 | 0 | llvm_unreachable("unexpected element type for RVV container"); |
2522 | 0 | case MVT::i1: |
2523 | 0 | case MVT::i8: |
2524 | 0 | case MVT::i16: |
2525 | 0 | case MVT::i32: |
2526 | 0 | case MVT::i64: |
2527 | 0 | case MVT::f16: |
2528 | 0 | case MVT::f32: |
2529 | 0 | case MVT::f64: { |
2530 | | // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for |
2531 | | // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within |
2532 | | // each fractional LMUL we support SEW between 8 and LMUL*ELEN. |
2533 | 0 | unsigned NumElts = |
2534 | 0 | (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; |
2535 | 0 | NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); |
2536 | 0 | assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); |
2537 | 0 | return MVT::getScalableVectorVT(EltVT, NumElts); |
2538 | 0 | } |
2539 | 0 | } |
2540 | 0 | } |
2541 | | |
2542 | | static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, |
2543 | 0 | const RISCVSubtarget &Subtarget) { |
2544 | 0 | return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, |
2545 | 0 | Subtarget); |
2546 | 0 | } |
2547 | | |
2548 | 0 | MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { |
2549 | 0 | return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); |
2550 | 0 | } |
2551 | | |
2552 | | // Grow V to consume an entire RVV register. |
2553 | | static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2554 | 0 | const RISCVSubtarget &Subtarget) { |
2555 | 0 | assert(VT.isScalableVector() && |
2556 | 0 | "Expected to convert into a scalable vector!"); |
2557 | 0 | assert(V.getValueType().isFixedLengthVector() && |
2558 | 0 | "Expected a fixed length vector operand!"); |
2559 | 0 | SDLoc DL(V); |
2560 | 0 | SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
2561 | 0 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); |
2562 | 0 | } |
2563 | | |
2564 | | // Shrink V so it's just big enough to maintain a VT's worth of data. |
2565 | | static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, |
2566 | 0 | const RISCVSubtarget &Subtarget) { |
2567 | 0 | assert(VT.isFixedLengthVector() && |
2568 | 0 | "Expected to convert into a fixed length vector!"); |
2569 | 0 | assert(V.getValueType().isScalableVector() && |
2570 | 0 | "Expected a scalable vector operand!"); |
2571 | 0 | SDLoc DL(V); |
2572 | 0 | SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
2573 | 0 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); |
2574 | 0 | } |
2575 | | |
2576 | | /// Return the type of the mask type suitable for masking the provided |
2577 | | /// vector type. This is simply an i1 element type vector of the same |
2578 | | /// (possibly scalable) length. |
2579 | 0 | static MVT getMaskTypeFor(MVT VecVT) { |
2580 | 0 | assert(VecVT.isVector()); |
2581 | 0 | ElementCount EC = VecVT.getVectorElementCount(); |
2582 | 0 | return MVT::getVectorVT(MVT::i1, EC); |
2583 | 0 | } |
2584 | | |
2585 | | /// Creates an all ones mask suitable for masking a vector of type VecTy with |
2586 | | /// vector length VL. . |
2587 | | static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, |
2588 | 0 | SelectionDAG &DAG) { |
2589 | 0 | MVT MaskVT = getMaskTypeFor(VecVT); |
2590 | 0 | return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); |
2591 | 0 | } |
2592 | | |
2593 | | static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2594 | 0 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2595 | | // If we know the exact VLEN, our VL is exactly equal to VLMAX, and |
2596 | | // we can't encode the AVL as an immediate, use the VLMAX encoding. |
2597 | 0 | const auto [MinVLMAX, MaxVLMAX] = |
2598 | 0 | RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); |
2599 | 0 | if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31) |
2600 | 0 | return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); |
2601 | | |
2602 | 0 | return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); |
2603 | 0 | } |
2604 | | |
2605 | | static std::pair<SDValue, SDValue> |
2606 | | getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, |
2607 | 0 | const RISCVSubtarget &Subtarget) { |
2608 | 0 | assert(VecVT.isScalableVector() && "Expecting a scalable vector"); |
2609 | 0 | SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); |
2610 | 0 | SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG); |
2611 | 0 | return {Mask, VL}; |
2612 | 0 | } |
2613 | | |
2614 | | static std::pair<SDValue, SDValue> |
2615 | | getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, |
2616 | 0 | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
2617 | 0 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); |
2618 | 0 | SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget); |
2619 | 0 | SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); |
2620 | 0 | return {Mask, VL}; |
2621 | 0 | } |
2622 | | |
2623 | | // Gets the two common "VL" operands: an all-ones mask and the vector length. |
2624 | | // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is |
2625 | | // the vector type that the fixed-length vector is contained in. Otherwise if |
2626 | | // VecVT is scalable, then ContainerVT should be the same as VecVT. |
2627 | | static std::pair<SDValue, SDValue> |
2628 | | getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, |
2629 | 0 | const RISCVSubtarget &Subtarget) { |
2630 | 0 | if (VecVT.isFixedLengthVector()) |
2631 | 0 | return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
2632 | 0 | Subtarget); |
2633 | 0 | assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); |
2634 | 0 | return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget); |
2635 | 0 | } |
2636 | | |
2637 | | SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL, |
2638 | 0 | SelectionDAG &DAG) const { |
2639 | 0 | assert(VecVT.isScalableVector() && "Expected scalable vector"); |
2640 | 0 | return DAG.getElementCount(DL, Subtarget.getXLenVT(), |
2641 | 0 | VecVT.getVectorElementCount()); |
2642 | 0 | } |
2643 | | |
2644 | | std::pair<unsigned, unsigned> |
2645 | | RISCVTargetLowering::computeVLMAXBounds(MVT VecVT, |
2646 | 0 | const RISCVSubtarget &Subtarget) { |
2647 | 0 | assert(VecVT.isScalableVector() && "Expected scalable vector"); |
2648 | | |
2649 | 0 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
2650 | 0 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
2651 | |
|
2652 | 0 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
2653 | 0 | unsigned MaxVLMAX = |
2654 | 0 | RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); |
2655 | |
|
2656 | 0 | unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
2657 | 0 | unsigned MinVLMAX = |
2658 | 0 | RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); |
2659 | |
|
2660 | 0 | return std::make_pair(MinVLMAX, MaxVLMAX); |
2661 | 0 | } |
2662 | | |
2663 | | // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few |
2664 | | // of either is (currently) supported. This can get us into an infinite loop |
2665 | | // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR |
2666 | | // as a ..., etc. |
2667 | | // Until either (or both) of these can reliably lower any node, reporting that |
2668 | | // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks |
2669 | | // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, |
2670 | | // which is not desirable. |
2671 | | bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( |
2672 | 0 | EVT VT, unsigned DefinedValues) const { |
2673 | 0 | return false; |
2674 | 0 | } |
2675 | | |
2676 | 0 | InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const { |
2677 | | // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is |
2678 | | // implementation-defined. |
2679 | 0 | if (!VT.isVector()) |
2680 | 0 | return InstructionCost::getInvalid(); |
2681 | 0 | unsigned DLenFactor = Subtarget.getDLenFactor(); |
2682 | 0 | unsigned Cost; |
2683 | 0 | if (VT.isScalableVector()) { |
2684 | 0 | unsigned LMul; |
2685 | 0 | bool Fractional; |
2686 | 0 | std::tie(LMul, Fractional) = |
2687 | 0 | RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT)); |
2688 | 0 | if (Fractional) |
2689 | 0 | Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1; |
2690 | 0 | else |
2691 | 0 | Cost = (LMul * DLenFactor); |
2692 | 0 | } else { |
2693 | 0 | Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor); |
2694 | 0 | } |
2695 | 0 | return Cost; |
2696 | 0 | } |
2697 | | |
2698 | | |
2699 | | /// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv |
2700 | | /// is generally quadratic in the number of vreg implied by LMUL. Note that |
2701 | | /// operand (index and possibly mask) are handled separately. |
2702 | 0 | InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const { |
2703 | 0 | return getLMULCost(VT) * getLMULCost(VT); |
2704 | 0 | } |
2705 | | |
2706 | | /// Return the cost of a vrgather.vi (or vx) instruction for the type VT. |
2707 | | /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL, |
2708 | | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2709 | 0 | InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { |
2710 | 0 | return getLMULCost(VT); |
2711 | 0 | } |
2712 | | |
2713 | | /// Return the cost of a vslidedown.vx or vslideup.vx instruction |
2714 | | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2715 | | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2716 | | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2717 | 0 | InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { |
2718 | 0 | return getLMULCost(VT); |
2719 | 0 | } |
2720 | | |
2721 | | /// Return the cost of a vslidedown.vi or vslideup.vi instruction |
2722 | | /// for the type VT. (This does not cover the vslide1up or vslide1down |
2723 | | /// variants.) Slides may be linear in the number of vregs implied by LMUL, |
2724 | | /// or may track the vrgather.vv cost. It is implementation-dependent. |
2725 | 0 | InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { |
2726 | 0 | return getLMULCost(VT); |
2727 | 0 | } |
2728 | | |
2729 | | static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, |
2730 | 0 | const RISCVSubtarget &Subtarget) { |
2731 | | // RISC-V FP-to-int conversions saturate to the destination register size, but |
2732 | | // don't produce 0 for nan. We can use a conversion instruction and fix the |
2733 | | // nan case with a compare and a select. |
2734 | 0 | SDValue Src = Op.getOperand(0); |
2735 | |
|
2736 | 0 | MVT DstVT = Op.getSimpleValueType(); |
2737 | 0 | EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
2738 | |
|
2739 | 0 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; |
2740 | |
|
2741 | 0 | if (!DstVT.isVector()) { |
2742 | | // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate |
2743 | | // the result. |
2744 | 0 | if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) || |
2745 | 0 | Src.getValueType() == MVT::bf16) { |
2746 | 0 | Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); |
2747 | 0 | } |
2748 | |
|
2749 | 0 | unsigned Opc; |
2750 | 0 | if (SatVT == DstVT) |
2751 | 0 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
2752 | 0 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
2753 | 0 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
2754 | 0 | else |
2755 | 0 | return SDValue(); |
2756 | | // FIXME: Support other SatVTs by clamping before or after the conversion. |
2757 | | |
2758 | 0 | SDLoc DL(Op); |
2759 | 0 | SDValue FpToInt = DAG.getNode( |
2760 | 0 | Opc, DL, DstVT, Src, |
2761 | 0 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); |
2762 | |
|
2763 | 0 | if (Opc == RISCVISD::FCVT_WU_RV64) |
2764 | 0 | FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); |
2765 | |
|
2766 | 0 | SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); |
2767 | 0 | return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, |
2768 | 0 | ISD::CondCode::SETUO); |
2769 | 0 | } |
2770 | | |
2771 | | // Vectors. |
2772 | | |
2773 | 0 | MVT DstEltVT = DstVT.getVectorElementType(); |
2774 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
2775 | 0 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
2776 | 0 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
2777 | 0 | unsigned DstEltSize = DstEltVT.getSizeInBits(); |
2778 | | |
2779 | | // Only handle saturating to the destination type. |
2780 | 0 | if (SatVT != DstEltVT) |
2781 | 0 | return SDValue(); |
2782 | | |
2783 | | // FIXME: Don't support narrowing by more than 1 steps for now. |
2784 | 0 | if (SrcEltSize > (2 * DstEltSize)) |
2785 | 0 | return SDValue(); |
2786 | | |
2787 | 0 | MVT DstContainerVT = DstVT; |
2788 | 0 | MVT SrcContainerVT = SrcVT; |
2789 | 0 | if (DstVT.isFixedLengthVector()) { |
2790 | 0 | DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); |
2791 | 0 | SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); |
2792 | 0 | assert(DstContainerVT.getVectorElementCount() == |
2793 | 0 | SrcContainerVT.getVectorElementCount() && |
2794 | 0 | "Expected same element count"); |
2795 | 0 | Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
2796 | 0 | } |
2797 | | |
2798 | 0 | SDLoc DL(Op); |
2799 | |
|
2800 | 0 | auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); |
2801 | |
|
2802 | 0 | SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), |
2803 | 0 | {Src, Src, DAG.getCondCode(ISD::SETNE), |
2804 | 0 | DAG.getUNDEF(Mask.getValueType()), Mask, VL}); |
2805 | | |
2806 | | // Need to widen by more than 1 step, promote the FP type, then do a widening |
2807 | | // convert. |
2808 | 0 | if (DstEltSize > (2 * SrcEltSize)) { |
2809 | 0 | assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); |
2810 | 0 | MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); |
2811 | 0 | Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); |
2812 | 0 | } |
2813 | | |
2814 | 0 | unsigned RVVOpc = |
2815 | 0 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
2816 | 0 | SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); |
2817 | |
|
2818 | 0 | SDValue SplatZero = DAG.getNode( |
2819 | 0 | RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), |
2820 | 0 | DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); |
2821 | 0 | Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero, |
2822 | 0 | Res, DAG.getUNDEF(DstContainerVT), VL); |
2823 | |
|
2824 | 0 | if (DstVT.isFixedLengthVector()) |
2825 | 0 | Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); |
2826 | |
|
2827 | 0 | return Res; |
2828 | 0 | } |
2829 | | |
2830 | 0 | static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { |
2831 | 0 | switch (Opc) { |
2832 | 0 | case ISD::FROUNDEVEN: |
2833 | 0 | case ISD::STRICT_FROUNDEVEN: |
2834 | 0 | case ISD::VP_FROUNDEVEN: |
2835 | 0 | return RISCVFPRndMode::RNE; |
2836 | 0 | case ISD::FTRUNC: |
2837 | 0 | case ISD::STRICT_FTRUNC: |
2838 | 0 | case ISD::VP_FROUNDTOZERO: |
2839 | 0 | return RISCVFPRndMode::RTZ; |
2840 | 0 | case ISD::FFLOOR: |
2841 | 0 | case ISD::STRICT_FFLOOR: |
2842 | 0 | case ISD::VP_FFLOOR: |
2843 | 0 | return RISCVFPRndMode::RDN; |
2844 | 0 | case ISD::FCEIL: |
2845 | 0 | case ISD::STRICT_FCEIL: |
2846 | 0 | case ISD::VP_FCEIL: |
2847 | 0 | return RISCVFPRndMode::RUP; |
2848 | 0 | case ISD::FROUND: |
2849 | 0 | case ISD::STRICT_FROUND: |
2850 | 0 | case ISD::VP_FROUND: |
2851 | 0 | return RISCVFPRndMode::RMM; |
2852 | 0 | case ISD::FRINT: |
2853 | 0 | return RISCVFPRndMode::DYN; |
2854 | 0 | } |
2855 | | |
2856 | 0 | return RISCVFPRndMode::Invalid; |
2857 | 0 | } |
2858 | | |
2859 | | // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND |
2860 | | // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to |
2861 | | // the integer domain and back. Taking care to avoid converting values that are |
2862 | | // nan or already correct. |
2863 | | static SDValue |
2864 | | lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
2865 | 0 | const RISCVSubtarget &Subtarget) { |
2866 | 0 | MVT VT = Op.getSimpleValueType(); |
2867 | 0 | assert(VT.isVector() && "Unexpected type"); |
2868 | | |
2869 | 0 | SDLoc DL(Op); |
2870 | |
|
2871 | 0 | SDValue Src = Op.getOperand(0); |
2872 | |
|
2873 | 0 | MVT ContainerVT = VT; |
2874 | 0 | if (VT.isFixedLengthVector()) { |
2875 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
2876 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
2877 | 0 | } |
2878 | |
|
2879 | 0 | SDValue Mask, VL; |
2880 | 0 | if (Op->isVPOpcode()) { |
2881 | 0 | Mask = Op.getOperand(1); |
2882 | 0 | if (VT.isFixedLengthVector()) |
2883 | 0 | Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, |
2884 | 0 | Subtarget); |
2885 | 0 | VL = Op.getOperand(2); |
2886 | 0 | } else { |
2887 | 0 | std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
2888 | 0 | } |
2889 | | |
2890 | | // Freeze the source since we are increasing the number of uses. |
2891 | 0 | Src = DAG.getFreeze(Src); |
2892 | | |
2893 | | // We do the conversion on the absolute value and fix the sign at the end. |
2894 | 0 | SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); |
2895 | | |
2896 | | // Determine the largest integer that can be represented exactly. This and |
2897 | | // values larger than it don't have any fractional bits so don't need to |
2898 | | // be converted. |
2899 | 0 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); |
2900 | 0 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
2901 | 0 | APFloat MaxVal = APFloat(FltSem); |
2902 | 0 | MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), |
2903 | 0 | /*IsSigned*/ false, APFloat::rmNearestTiesToEven); |
2904 | 0 | SDValue MaxValNode = |
2905 | 0 | DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); |
2906 | 0 | SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, |
2907 | 0 | DAG.getUNDEF(ContainerVT), MaxValNode, VL); |
2908 | | |
2909 | | // If abs(Src) was larger than MaxVal or nan, keep it. |
2910 | 0 | MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
2911 | 0 | Mask = |
2912 | 0 | DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, |
2913 | 0 | {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), |
2914 | 0 | Mask, Mask, VL}); |
2915 | | |
2916 | | // Truncate to integer and convert back to FP. |
2917 | 0 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
2918 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
2919 | 0 | SDValue Truncated; |
2920 | |
|
2921 | 0 | switch (Op.getOpcode()) { |
2922 | 0 | default: |
2923 | 0 | llvm_unreachable("Unexpected opcode"); |
2924 | 0 | case ISD::FCEIL: |
2925 | 0 | case ISD::VP_FCEIL: |
2926 | 0 | case ISD::FFLOOR: |
2927 | 0 | case ISD::VP_FFLOOR: |
2928 | 0 | case ISD::FROUND: |
2929 | 0 | case ISD::FROUNDEVEN: |
2930 | 0 | case ISD::VP_FROUND: |
2931 | 0 | case ISD::VP_FROUNDEVEN: |
2932 | 0 | case ISD::VP_FROUNDTOZERO: { |
2933 | 0 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); |
2934 | 0 | assert(FRM != RISCVFPRndMode::Invalid); |
2935 | 0 | Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, |
2936 | 0 | DAG.getTargetConstant(FRM, DL, XLenVT), VL); |
2937 | 0 | break; |
2938 | 0 | } |
2939 | 0 | case ISD::FTRUNC: |
2940 | 0 | Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, |
2941 | 0 | Mask, VL); |
2942 | 0 | break; |
2943 | 0 | case ISD::FRINT: |
2944 | 0 | case ISD::VP_FRINT: |
2945 | 0 | Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); |
2946 | 0 | break; |
2947 | 0 | case ISD::FNEARBYINT: |
2948 | 0 | case ISD::VP_FNEARBYINT: |
2949 | 0 | Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, |
2950 | 0 | Mask, VL); |
2951 | 0 | break; |
2952 | 0 | } |
2953 | | |
2954 | | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
2955 | 0 | if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL) |
2956 | 0 | Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, |
2957 | 0 | Mask, VL); |
2958 | | |
2959 | | // Restore the original sign so that -0.0 is preserved. |
2960 | 0 | Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, |
2961 | 0 | Src, Src, Mask, VL); |
2962 | |
|
2963 | 0 | if (!VT.isFixedLengthVector()) |
2964 | 0 | return Truncated; |
2965 | | |
2966 | 0 | return convertFromScalableVector(VT, Truncated, DAG, Subtarget); |
2967 | 0 | } |
2968 | | |
2969 | | // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND |
2970 | | // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to |
2971 | | // qNan and coverting the new source to integer and back to FP. |
2972 | | static SDValue |
2973 | | lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
2974 | 0 | const RISCVSubtarget &Subtarget) { |
2975 | 0 | SDLoc DL(Op); |
2976 | 0 | MVT VT = Op.getSimpleValueType(); |
2977 | 0 | SDValue Chain = Op.getOperand(0); |
2978 | 0 | SDValue Src = Op.getOperand(1); |
2979 | |
|
2980 | 0 | MVT ContainerVT = VT; |
2981 | 0 | if (VT.isFixedLengthVector()) { |
2982 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
2983 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
2984 | 0 | } |
2985 | |
|
2986 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
2987 | | |
2988 | | // Freeze the source since we are increasing the number of uses. |
2989 | 0 | Src = DAG.getFreeze(Src); |
2990 | | |
2991 | | // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src. |
2992 | 0 | MVT MaskVT = Mask.getSimpleValueType(); |
2993 | 0 | SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL, |
2994 | 0 | DAG.getVTList(MaskVT, MVT::Other), |
2995 | 0 | {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE), |
2996 | 0 | DAG.getUNDEF(MaskVT), Mask, VL}); |
2997 | 0 | Chain = Unorder.getValue(1); |
2998 | 0 | Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, |
2999 | 0 | DAG.getVTList(ContainerVT, MVT::Other), |
3000 | 0 | {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); |
3001 | 0 | Chain = Src.getValue(1); |
3002 | | |
3003 | | // We do the conversion on the absolute value and fix the sign at the end. |
3004 | 0 | SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); |
3005 | | |
3006 | | // Determine the largest integer that can be represented exactly. This and |
3007 | | // values larger than it don't have any fractional bits so don't need to |
3008 | | // be converted. |
3009 | 0 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); |
3010 | 0 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3011 | 0 | APFloat MaxVal = APFloat(FltSem); |
3012 | 0 | MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), |
3013 | 0 | /*IsSigned*/ false, APFloat::rmNearestTiesToEven); |
3014 | 0 | SDValue MaxValNode = |
3015 | 0 | DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); |
3016 | 0 | SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, |
3017 | 0 | DAG.getUNDEF(ContainerVT), MaxValNode, VL); |
3018 | | |
3019 | | // If abs(Src) was larger than MaxVal or nan, keep it. |
3020 | 0 | Mask = DAG.getNode( |
3021 | 0 | RISCVISD::SETCC_VL, DL, MaskVT, |
3022 | 0 | {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL}); |
3023 | | |
3024 | | // Truncate to integer and convert back to FP. |
3025 | 0 | MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); |
3026 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
3027 | 0 | SDValue Truncated; |
3028 | |
|
3029 | 0 | switch (Op.getOpcode()) { |
3030 | 0 | default: |
3031 | 0 | llvm_unreachable("Unexpected opcode"); |
3032 | 0 | case ISD::STRICT_FCEIL: |
3033 | 0 | case ISD::STRICT_FFLOOR: |
3034 | 0 | case ISD::STRICT_FROUND: |
3035 | 0 | case ISD::STRICT_FROUNDEVEN: { |
3036 | 0 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); |
3037 | 0 | assert(FRM != RISCVFPRndMode::Invalid); |
3038 | 0 | Truncated = DAG.getNode( |
3039 | 0 | RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other), |
3040 | 0 | {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL}); |
3041 | 0 | break; |
3042 | 0 | } |
3043 | 0 | case ISD::STRICT_FTRUNC: |
3044 | 0 | Truncated = |
3045 | 0 | DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL, |
3046 | 0 | DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL); |
3047 | 0 | break; |
3048 | 0 | case ISD::STRICT_FNEARBYINT: |
3049 | 0 | Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL, |
3050 | 0 | DAG.getVTList(ContainerVT, MVT::Other), Chain, Src, |
3051 | 0 | Mask, VL); |
3052 | 0 | break; |
3053 | 0 | } |
3054 | 0 | Chain = Truncated.getValue(1); |
3055 | | |
3056 | | // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. |
3057 | 0 | if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) { |
3058 | 0 | Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL, |
3059 | 0 | DAG.getVTList(ContainerVT, MVT::Other), Chain, |
3060 | 0 | Truncated, Mask, VL); |
3061 | 0 | Chain = Truncated.getValue(1); |
3062 | 0 | } |
3063 | | |
3064 | | // Restore the original sign so that -0.0 is preserved. |
3065 | 0 | Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, |
3066 | 0 | Src, Src, Mask, VL); |
3067 | |
|
3068 | 0 | if (VT.isFixedLengthVector()) |
3069 | 0 | Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget); |
3070 | 0 | return DAG.getMergeValues({Truncated, Chain}, DL); |
3071 | 0 | } |
3072 | | |
3073 | | static SDValue |
3074 | | lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, |
3075 | 0 | const RISCVSubtarget &Subtarget) { |
3076 | 0 | MVT VT = Op.getSimpleValueType(); |
3077 | 0 | if (VT.isVector()) |
3078 | 0 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
3079 | | |
3080 | 0 | if (DAG.shouldOptForSize()) |
3081 | 0 | return SDValue(); |
3082 | | |
3083 | 0 | SDLoc DL(Op); |
3084 | 0 | SDValue Src = Op.getOperand(0); |
3085 | | |
3086 | | // Create an integer the size of the mantissa with the MSB set. This and all |
3087 | | // values larger than it don't have any fractional bits so don't need to be |
3088 | | // converted. |
3089 | 0 | const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); |
3090 | 0 | unsigned Precision = APFloat::semanticsPrecision(FltSem); |
3091 | 0 | APFloat MaxVal = APFloat(FltSem); |
3092 | 0 | MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), |
3093 | 0 | /*IsSigned*/ false, APFloat::rmNearestTiesToEven); |
3094 | 0 | SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); |
3095 | |
|
3096 | 0 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); |
3097 | 0 | return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, |
3098 | 0 | DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); |
3099 | 0 | } |
3100 | | |
3101 | | // Expand vector LRINT and LLRINT by converting to the integer domain. |
3102 | | static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, |
3103 | 0 | const RISCVSubtarget &Subtarget) { |
3104 | 0 | MVT VT = Op.getSimpleValueType(); |
3105 | 0 | assert(VT.isVector() && "Unexpected type"); |
3106 | | |
3107 | 0 | SDLoc DL(Op); |
3108 | 0 | SDValue Src = Op.getOperand(0); |
3109 | 0 | MVT ContainerVT = VT; |
3110 | |
|
3111 | 0 | if (VT.isFixedLengthVector()) { |
3112 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3113 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
3114 | 0 | } |
3115 | |
|
3116 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
3117 | 0 | SDValue Truncated = |
3118 | 0 | DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL); |
3119 | |
|
3120 | 0 | if (!VT.isFixedLengthVector()) |
3121 | 0 | return Truncated; |
3122 | | |
3123 | 0 | return convertFromScalableVector(VT, Truncated, DAG, Subtarget); |
3124 | 0 | } |
3125 | | |
3126 | | static SDValue |
3127 | | getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, |
3128 | | const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, |
3129 | | SDValue Offset, SDValue Mask, SDValue VL, |
3130 | 0 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3131 | 0 | if (Merge.isUndef()) |
3132 | 0 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3133 | 0 | SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); |
3134 | 0 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3135 | 0 | return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); |
3136 | 0 | } |
3137 | | |
3138 | | static SDValue |
3139 | | getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, |
3140 | | EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, |
3141 | | SDValue VL, |
3142 | 0 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { |
3143 | 0 | if (Merge.isUndef()) |
3144 | 0 | Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3145 | 0 | SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); |
3146 | 0 | SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; |
3147 | 0 | return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); |
3148 | 0 | } |
3149 | | |
3150 | 0 | static MVT getLMUL1VT(MVT VT) { |
3151 | 0 | assert(VT.getVectorElementType().getSizeInBits() <= 64 && |
3152 | 0 | "Unexpected vector MVT"); |
3153 | 0 | return MVT::getScalableVectorVT( |
3154 | 0 | VT.getVectorElementType(), |
3155 | 0 | RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); |
3156 | 0 | } |
3157 | | |
3158 | | struct VIDSequence { |
3159 | | int64_t StepNumerator; |
3160 | | unsigned StepDenominator; |
3161 | | int64_t Addend; |
3162 | | }; |
3163 | | |
3164 | | static std::optional<uint64_t> getExactInteger(const APFloat &APF, |
3165 | 0 | uint32_t BitWidth) { |
3166 | 0 | APSInt ValInt(BitWidth, !APF.isNegative()); |
3167 | | // We use an arbitrary rounding mode here. If a floating-point is an exact |
3168 | | // integer (e.g., 1.0), the rounding mode does not affect the output value. If |
3169 | | // the rounding mode changes the output value, then it is not an exact |
3170 | | // integer. |
3171 | 0 | RoundingMode ArbitraryRM = RoundingMode::TowardZero; |
3172 | 0 | bool IsExact; |
3173 | | // If it is out of signed integer range, it will return an invalid operation. |
3174 | | // If it is not an exact integer, IsExact is false. |
3175 | 0 | if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == |
3176 | 0 | APFloatBase::opInvalidOp) || |
3177 | 0 | !IsExact) |
3178 | 0 | return std::nullopt; |
3179 | 0 | return ValInt.extractBitsAsZExtValue(BitWidth, 0); |
3180 | 0 | } |
3181 | | |
3182 | | // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] |
3183 | | // to the (non-zero) step S and start value X. This can be then lowered as the |
3184 | | // RVV sequence (VID * S) + X, for example. |
3185 | | // The step S is represented as an integer numerator divided by a positive |
3186 | | // denominator. Note that the implementation currently only identifies |
3187 | | // sequences in which either the numerator is +/- 1 or the denominator is 1. It |
3188 | | // cannot detect 2/3, for example. |
3189 | | // Note that this method will also match potentially unappealing index |
3190 | | // sequences, like <i32 0, i32 50939494>, however it is left to the caller to |
3191 | | // determine whether this is worth generating code for. |
3192 | 0 | static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { |
3193 | 0 | unsigned NumElts = Op.getNumOperands(); |
3194 | 0 | assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); |
3195 | 0 | bool IsInteger = Op.getValueType().isInteger(); |
3196 | |
|
3197 | 0 | std::optional<unsigned> SeqStepDenom; |
3198 | 0 | std::optional<int64_t> SeqStepNum, SeqAddend; |
3199 | 0 | std::optional<std::pair<uint64_t, unsigned>> PrevElt; |
3200 | 0 | unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); |
3201 | 0 | for (unsigned Idx = 0; Idx < NumElts; Idx++) { |
3202 | | // Assume undef elements match the sequence; we just have to be careful |
3203 | | // when interpolating across them. |
3204 | 0 | if (Op.getOperand(Idx).isUndef()) |
3205 | 0 | continue; |
3206 | | |
3207 | 0 | uint64_t Val; |
3208 | 0 | if (IsInteger) { |
3209 | | // The BUILD_VECTOR must be all constants. |
3210 | 0 | if (!isa<ConstantSDNode>(Op.getOperand(Idx))) |
3211 | 0 | return std::nullopt; |
3212 | 0 | Val = Op.getConstantOperandVal(Idx) & |
3213 | 0 | maskTrailingOnes<uint64_t>(EltSizeInBits); |
3214 | 0 | } else { |
3215 | | // The BUILD_VECTOR must be all constants. |
3216 | 0 | if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) |
3217 | 0 | return std::nullopt; |
3218 | 0 | if (auto ExactInteger = getExactInteger( |
3219 | 0 | cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), |
3220 | 0 | EltSizeInBits)) |
3221 | 0 | Val = *ExactInteger; |
3222 | 0 | else |
3223 | 0 | return std::nullopt; |
3224 | 0 | } |
3225 | | |
3226 | 0 | if (PrevElt) { |
3227 | | // Calculate the step since the last non-undef element, and ensure |
3228 | | // it's consistent across the entire sequence. |
3229 | 0 | unsigned IdxDiff = Idx - PrevElt->second; |
3230 | 0 | int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); |
3231 | | |
3232 | | // A zero-value value difference means that we're somewhere in the middle |
3233 | | // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a |
3234 | | // step change before evaluating the sequence. |
3235 | 0 | if (ValDiff == 0) |
3236 | 0 | continue; |
3237 | | |
3238 | 0 | int64_t Remainder = ValDiff % IdxDiff; |
3239 | | // Normalize the step if it's greater than 1. |
3240 | 0 | if (Remainder != ValDiff) { |
3241 | | // The difference must cleanly divide the element span. |
3242 | 0 | if (Remainder != 0) |
3243 | 0 | return std::nullopt; |
3244 | 0 | ValDiff /= IdxDiff; |
3245 | 0 | IdxDiff = 1; |
3246 | 0 | } |
3247 | | |
3248 | 0 | if (!SeqStepNum) |
3249 | 0 | SeqStepNum = ValDiff; |
3250 | 0 | else if (ValDiff != SeqStepNum) |
3251 | 0 | return std::nullopt; |
3252 | | |
3253 | 0 | if (!SeqStepDenom) |
3254 | 0 | SeqStepDenom = IdxDiff; |
3255 | 0 | else if (IdxDiff != *SeqStepDenom) |
3256 | 0 | return std::nullopt; |
3257 | 0 | } |
3258 | | |
3259 | | // Record this non-undef element for later. |
3260 | 0 | if (!PrevElt || PrevElt->first != Val) |
3261 | 0 | PrevElt = std::make_pair(Val, Idx); |
3262 | 0 | } |
3263 | | |
3264 | | // We need to have logged a step for this to count as a legal index sequence. |
3265 | 0 | if (!SeqStepNum || !SeqStepDenom) |
3266 | 0 | return std::nullopt; |
3267 | | |
3268 | | // Loop back through the sequence and validate elements we might have skipped |
3269 | | // while waiting for a valid step. While doing this, log any sequence addend. |
3270 | 0 | for (unsigned Idx = 0; Idx < NumElts; Idx++) { |
3271 | 0 | if (Op.getOperand(Idx).isUndef()) |
3272 | 0 | continue; |
3273 | 0 | uint64_t Val; |
3274 | 0 | if (IsInteger) { |
3275 | 0 | Val = Op.getConstantOperandVal(Idx) & |
3276 | 0 | maskTrailingOnes<uint64_t>(EltSizeInBits); |
3277 | 0 | } else { |
3278 | 0 | Val = *getExactInteger( |
3279 | 0 | cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), |
3280 | 0 | EltSizeInBits); |
3281 | 0 | } |
3282 | 0 | uint64_t ExpectedVal = |
3283 | 0 | (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; |
3284 | 0 | int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); |
3285 | 0 | if (!SeqAddend) |
3286 | 0 | SeqAddend = Addend; |
3287 | 0 | else if (Addend != SeqAddend) |
3288 | 0 | return std::nullopt; |
3289 | 0 | } |
3290 | | |
3291 | 0 | assert(SeqAddend && "Must have an addend if we have a step"); |
3292 | | |
3293 | 0 | return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; |
3294 | 0 | } |
3295 | | |
3296 | | // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT |
3297 | | // and lower it as a VRGATHER_VX_VL from the source vector. |
3298 | | static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, |
3299 | | SelectionDAG &DAG, |
3300 | 0 | const RISCVSubtarget &Subtarget) { |
3301 | 0 | if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
3302 | 0 | return SDValue(); |
3303 | 0 | SDValue Vec = SplatVal.getOperand(0); |
3304 | | // Only perform this optimization on vectors of the same size for simplicity. |
3305 | | // Don't perform this optimization for i1 vectors. |
3306 | | // FIXME: Support i1 vectors, maybe by promoting to i8? |
3307 | 0 | if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) |
3308 | 0 | return SDValue(); |
3309 | 0 | SDValue Idx = SplatVal.getOperand(1); |
3310 | | // The index must be a legal type. |
3311 | 0 | if (Idx.getValueType() != Subtarget.getXLenVT()) |
3312 | 0 | return SDValue(); |
3313 | | |
3314 | 0 | MVT ContainerVT = VT; |
3315 | 0 | if (VT.isFixedLengthVector()) { |
3316 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3317 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
3318 | 0 | } |
3319 | |
|
3320 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
3321 | |
|
3322 | 0 | SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, |
3323 | 0 | Idx, DAG.getUNDEF(ContainerVT), Mask, VL); |
3324 | |
|
3325 | 0 | if (!VT.isFixedLengthVector()) |
3326 | 0 | return Gather; |
3327 | | |
3328 | 0 | return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
3329 | 0 | } |
3330 | | |
3331 | | |
3332 | | /// Try and optimize BUILD_VECTORs with "dominant values" - these are values |
3333 | | /// which constitute a large proportion of the elements. In such cases we can |
3334 | | /// splat a vector with the dominant element and make up the shortfall with |
3335 | | /// INSERT_VECTOR_ELTs. Returns SDValue if not profitable. |
3336 | | /// Note that this includes vectors of 2 elements by association. The |
3337 | | /// upper-most element is the "dominant" one, allowing us to use a splat to |
3338 | | /// "insert" the upper element, and an insert of the lower element at position |
3339 | | /// 0, which improves codegen. |
3340 | | static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, |
3341 | 0 | const RISCVSubtarget &Subtarget) { |
3342 | 0 | MVT VT = Op.getSimpleValueType(); |
3343 | 0 | assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
3344 | | |
3345 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3346 | |
|
3347 | 0 | SDLoc DL(Op); |
3348 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
3349 | |
|
3350 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
3351 | 0 | unsigned NumElts = Op.getNumOperands(); |
3352 | |
|
3353 | 0 | SDValue DominantValue; |
3354 | 0 | unsigned MostCommonCount = 0; |
3355 | 0 | DenseMap<SDValue, unsigned> ValueCounts; |
3356 | 0 | unsigned NumUndefElts = |
3357 | 0 | count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); |
3358 | | |
3359 | | // Track the number of scalar loads we know we'd be inserting, estimated as |
3360 | | // any non-zero floating-point constant. Other kinds of element are either |
3361 | | // already in registers or are materialized on demand. The threshold at which |
3362 | | // a vector load is more desirable than several scalar materializion and |
3363 | | // vector-insertion instructions is not known. |
3364 | 0 | unsigned NumScalarLoads = 0; |
3365 | |
|
3366 | 0 | for (SDValue V : Op->op_values()) { |
3367 | 0 | if (V.isUndef()) |
3368 | 0 | continue; |
3369 | | |
3370 | 0 | ValueCounts.insert(std::make_pair(V, 0)); |
3371 | 0 | unsigned &Count = ValueCounts[V]; |
3372 | 0 | if (0 == Count) |
3373 | 0 | if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) |
3374 | 0 | NumScalarLoads += !CFP->isExactlyValue(+0.0); |
3375 | | |
3376 | | // Is this value dominant? In case of a tie, prefer the highest element as |
3377 | | // it's cheaper to insert near the beginning of a vector than it is at the |
3378 | | // end. |
3379 | 0 | if (++Count >= MostCommonCount) { |
3380 | 0 | DominantValue = V; |
3381 | 0 | MostCommonCount = Count; |
3382 | 0 | } |
3383 | 0 | } |
3384 | |
|
3385 | 0 | assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); |
3386 | 0 | unsigned NumDefElts = NumElts - NumUndefElts; |
3387 | 0 | unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; |
3388 | | |
3389 | | // Don't perform this optimization when optimizing for size, since |
3390 | | // materializing elements and inserting them tends to cause code bloat. |
3391 | 0 | if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && |
3392 | 0 | (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && |
3393 | 0 | ((MostCommonCount > DominantValueCountThreshold) || |
3394 | 0 | (ValueCounts.size() <= Log2_32(NumDefElts)))) { |
3395 | | // Start by splatting the most common element. |
3396 | 0 | SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); |
3397 | |
|
3398 | 0 | DenseSet<SDValue> Processed{DominantValue}; |
3399 | | |
3400 | | // We can handle an insert into the last element (of a splat) via |
3401 | | // v(f)slide1down. This is slightly better than the vslideup insert |
3402 | | // lowering as it avoids the need for a vector group temporary. It |
3403 | | // is also better than using vmerge.vx as it avoids the need to |
3404 | | // materialize the mask in a vector register. |
3405 | 0 | if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1); |
3406 | 0 | !LastOp.isUndef() && ValueCounts[LastOp] == 1 && |
3407 | 0 | LastOp != DominantValue) { |
3408 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
3409 | 0 | auto OpCode = |
3410 | 0 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
3411 | 0 | if (!VT.isFloatingPoint()) |
3412 | 0 | LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp); |
3413 | 0 | Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, |
3414 | 0 | LastOp, Mask, VL); |
3415 | 0 | Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget); |
3416 | 0 | Processed.insert(LastOp); |
3417 | 0 | } |
3418 | |
|
3419 | 0 | MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); |
3420 | 0 | for (const auto &OpIdx : enumerate(Op->ops())) { |
3421 | 0 | const SDValue &V = OpIdx.value(); |
3422 | 0 | if (V.isUndef() || !Processed.insert(V).second) |
3423 | 0 | continue; |
3424 | 0 | if (ValueCounts[V] == 1) { |
3425 | 0 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, |
3426 | 0 | DAG.getConstant(OpIdx.index(), DL, XLenVT)); |
3427 | 0 | } else { |
3428 | | // Blend in all instances of this value using a VSELECT, using a |
3429 | | // mask where each bit signals whether that element is the one |
3430 | | // we're after. |
3431 | 0 | SmallVector<SDValue> Ops; |
3432 | 0 | transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { |
3433 | 0 | return DAG.getConstant(V == V1, DL, XLenVT); |
3434 | 0 | }); |
3435 | 0 | Vec = DAG.getNode(ISD::VSELECT, DL, VT, |
3436 | 0 | DAG.getBuildVector(SelMaskTy, DL, Ops), |
3437 | 0 | DAG.getSplatBuildVector(VT, DL, V), Vec); |
3438 | 0 | } |
3439 | 0 | } |
3440 | |
|
3441 | 0 | return Vec; |
3442 | 0 | } |
3443 | | |
3444 | 0 | return SDValue(); |
3445 | 0 | } |
3446 | | |
3447 | | static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, |
3448 | 0 | const RISCVSubtarget &Subtarget) { |
3449 | 0 | MVT VT = Op.getSimpleValueType(); |
3450 | 0 | assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
3451 | | |
3452 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3453 | |
|
3454 | 0 | SDLoc DL(Op); |
3455 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
3456 | |
|
3457 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
3458 | 0 | unsigned NumElts = Op.getNumOperands(); |
3459 | |
|
3460 | 0 | if (VT.getVectorElementType() == MVT::i1) { |
3461 | 0 | if (ISD::isBuildVectorAllZeros(Op.getNode())) { |
3462 | 0 | SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); |
3463 | 0 | return convertFromScalableVector(VT, VMClr, DAG, Subtarget); |
3464 | 0 | } |
3465 | | |
3466 | 0 | if (ISD::isBuildVectorAllOnes(Op.getNode())) { |
3467 | 0 | SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); |
3468 | 0 | return convertFromScalableVector(VT, VMSet, DAG, Subtarget); |
3469 | 0 | } |
3470 | | |
3471 | | // Lower constant mask BUILD_VECTORs via an integer vector type, in |
3472 | | // scalar integer chunks whose bit-width depends on the number of mask |
3473 | | // bits and XLEN. |
3474 | | // First, determine the most appropriate scalar integer type to use. This |
3475 | | // is at most XLenVT, but may be shrunk to a smaller vector element type |
3476 | | // according to the size of the final vector - use i8 chunks rather than |
3477 | | // XLenVT if we're producing a v8i1. This results in more consistent |
3478 | | // codegen across RV32 and RV64. |
3479 | 0 | unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); |
3480 | 0 | NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); |
3481 | | // If we have to use more than one INSERT_VECTOR_ELT then this |
3482 | | // optimization is likely to increase code size; avoid peforming it in |
3483 | | // such a case. We can use a load from a constant pool in this case. |
3484 | 0 | if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) |
3485 | 0 | return SDValue(); |
3486 | | // Now we can create our integer vector type. Note that it may be larger |
3487 | | // than the resulting mask type: v4i1 would use v1i8 as its integer type. |
3488 | 0 | unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); |
3489 | 0 | MVT IntegerViaVecVT = |
3490 | 0 | MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), |
3491 | 0 | IntegerViaVecElts); |
3492 | |
|
3493 | 0 | uint64_t Bits = 0; |
3494 | 0 | unsigned BitPos = 0, IntegerEltIdx = 0; |
3495 | 0 | SmallVector<SDValue, 8> Elts(IntegerViaVecElts); |
3496 | |
|
3497 | 0 | for (unsigned I = 0; I < NumElts;) { |
3498 | 0 | SDValue V = Op.getOperand(I); |
3499 | 0 | bool BitValue = !V.isUndef() && V->getAsZExtVal(); |
3500 | 0 | Bits |= ((uint64_t)BitValue << BitPos); |
3501 | 0 | ++BitPos; |
3502 | 0 | ++I; |
3503 | | |
3504 | | // Once we accumulate enough bits to fill our scalar type or process the |
3505 | | // last element, insert into our vector and clear our accumulated data. |
3506 | 0 | if (I % NumViaIntegerBits == 0 || I == NumElts) { |
3507 | 0 | if (NumViaIntegerBits <= 32) |
3508 | 0 | Bits = SignExtend64<32>(Bits); |
3509 | 0 | SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); |
3510 | 0 | Elts[IntegerEltIdx] = Elt; |
3511 | 0 | Bits = 0; |
3512 | 0 | BitPos = 0; |
3513 | 0 | IntegerEltIdx++; |
3514 | 0 | } |
3515 | 0 | } |
3516 | |
|
3517 | 0 | SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); |
3518 | |
|
3519 | 0 | if (NumElts < NumViaIntegerBits) { |
3520 | | // If we're producing a smaller vector than our minimum legal integer |
3521 | | // type, bitcast to the equivalent (known-legal) mask type, and extract |
3522 | | // our final mask. |
3523 | 0 | assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); |
3524 | 0 | Vec = DAG.getBitcast(MVT::v8i1, Vec); |
3525 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, |
3526 | 0 | DAG.getConstant(0, DL, XLenVT)); |
3527 | 0 | } else { |
3528 | | // Else we must have produced an integer type with the same size as the |
3529 | | // mask type; bitcast for the final result. |
3530 | 0 | assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); |
3531 | 0 | Vec = DAG.getBitcast(VT, Vec); |
3532 | 0 | } |
3533 | | |
3534 | 0 | return Vec; |
3535 | 0 | } |
3536 | | |
3537 | 0 | if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
3538 | 0 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
3539 | 0 | : RISCVISD::VMV_V_X_VL; |
3540 | 0 | if (!VT.isFloatingPoint()) |
3541 | 0 | Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); |
3542 | 0 | Splat = |
3543 | 0 | DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); |
3544 | 0 | return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
3545 | 0 | } |
3546 | | |
3547 | | // Try and match index sequences, which we can lower to the vid instruction |
3548 | | // with optional modifications. An all-undef vector is matched by |
3549 | | // getSplatValue, above. |
3550 | 0 | if (auto SimpleVID = isSimpleVIDSequence(Op)) { |
3551 | 0 | int64_t StepNumerator = SimpleVID->StepNumerator; |
3552 | 0 | unsigned StepDenominator = SimpleVID->StepDenominator; |
3553 | 0 | int64_t Addend = SimpleVID->Addend; |
3554 | |
|
3555 | 0 | assert(StepNumerator != 0 && "Invalid step"); |
3556 | 0 | bool Negate = false; |
3557 | 0 | int64_t SplatStepVal = StepNumerator; |
3558 | 0 | unsigned StepOpcode = ISD::MUL; |
3559 | | // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it |
3560 | | // anyway as the shift of 63 won't fit in uimm5. |
3561 | 0 | if (StepNumerator != 1 && StepNumerator != INT64_MIN && |
3562 | 0 | isPowerOf2_64(std::abs(StepNumerator))) { |
3563 | 0 | Negate = StepNumerator < 0; |
3564 | 0 | StepOpcode = ISD::SHL; |
3565 | 0 | SplatStepVal = Log2_64(std::abs(StepNumerator)); |
3566 | 0 | } |
3567 | | |
3568 | | // Only emit VIDs with suitably-small steps/addends. We use imm5 is a |
3569 | | // threshold since it's the immediate value many RVV instructions accept. |
3570 | | // There is no vmul.vi instruction so ensure multiply constant can fit in |
3571 | | // a single addi instruction. |
3572 | 0 | if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || |
3573 | 0 | (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && |
3574 | 0 | isPowerOf2_32(StepDenominator) && |
3575 | 0 | (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { |
3576 | 0 | MVT VIDVT = |
3577 | 0 | VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; |
3578 | 0 | MVT VIDContainerVT = |
3579 | 0 | getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); |
3580 | 0 | SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); |
3581 | | // Convert right out of the scalable type so we can use standard ISD |
3582 | | // nodes for the rest of the computation. If we used scalable types with |
3583 | | // these, we'd lose the fixed-length vector info and generate worse |
3584 | | // vsetvli code. |
3585 | 0 | VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); |
3586 | 0 | if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || |
3587 | 0 | (StepOpcode == ISD::SHL && SplatStepVal != 0)) { |
3588 | 0 | SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT); |
3589 | 0 | VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); |
3590 | 0 | } |
3591 | 0 | if (StepDenominator != 1) { |
3592 | 0 | SDValue SplatStep = |
3593 | 0 | DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT); |
3594 | 0 | VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); |
3595 | 0 | } |
3596 | 0 | if (Addend != 0 || Negate) { |
3597 | 0 | SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT); |
3598 | 0 | VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, |
3599 | 0 | VID); |
3600 | 0 | } |
3601 | 0 | if (VT.isFloatingPoint()) { |
3602 | | // TODO: Use vfwcvt to reduce register pressure. |
3603 | 0 | VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); |
3604 | 0 | } |
3605 | 0 | return VID; |
3606 | 0 | } |
3607 | 0 | } |
3608 | | |
3609 | | // For very small build_vectors, use a single scalar insert of a constant. |
3610 | | // TODO: Base this on constant rematerialization cost, not size. |
3611 | 0 | const unsigned EltBitSize = VT.getScalarSizeInBits(); |
3612 | 0 | if (VT.getSizeInBits() <= 32 && |
3613 | 0 | ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { |
3614 | 0 | MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits()); |
3615 | 0 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) && |
3616 | 0 | "Unexpected sequence type"); |
3617 | | // If we can use the original VL with the modified element type, this |
3618 | | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3619 | | // be moved into InsertVSETVLI? |
3620 | 0 | unsigned ViaVecLen = |
3621 | 0 | (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1; |
3622 | 0 | MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); |
3623 | |
|
3624 | 0 | uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); |
3625 | 0 | uint64_t SplatValue = 0; |
3626 | | // Construct the amalgamated value at this larger vector type. |
3627 | 0 | for (const auto &OpIdx : enumerate(Op->op_values())) { |
3628 | 0 | const auto &SeqV = OpIdx.value(); |
3629 | 0 | if (!SeqV.isUndef()) |
3630 | 0 | SplatValue |= |
3631 | 0 | ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); |
3632 | 0 | } |
3633 | | |
3634 | | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3635 | | // achieve better constant materializion. |
3636 | 0 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3637 | 0 | SplatValue = SignExtend64<32>(SplatValue); |
3638 | |
|
3639 | 0 | SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT, |
3640 | 0 | DAG.getUNDEF(ViaVecVT), |
3641 | 0 | DAG.getConstant(SplatValue, DL, XLenVT), |
3642 | 0 | DAG.getConstant(0, DL, XLenVT)); |
3643 | 0 | if (ViaVecLen != 1) |
3644 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, |
3645 | 0 | MVT::getVectorVT(ViaIntVT, 1), Vec, |
3646 | 0 | DAG.getConstant(0, DL, XLenVT)); |
3647 | 0 | return DAG.getBitcast(VT, Vec); |
3648 | 0 | } |
3649 | | |
3650 | | |
3651 | | // Attempt to detect "hidden" splats, which only reveal themselves as splats |
3652 | | // when re-interpreted as a vector with a larger element type. For example, |
3653 | | // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 |
3654 | | // could be instead splat as |
3655 | | // v2i32 = build_vector i32 0x00010000, i32 0x00010000 |
3656 | | // TODO: This optimization could also work on non-constant splats, but it |
3657 | | // would require bit-manipulation instructions to construct the splat value. |
3658 | 0 | SmallVector<SDValue> Sequence; |
3659 | 0 | const auto *BV = cast<BuildVectorSDNode>(Op); |
3660 | 0 | if (VT.isInteger() && EltBitSize < Subtarget.getELen() && |
3661 | 0 | ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
3662 | 0 | BV->getRepeatedSequence(Sequence) && |
3663 | 0 | (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { |
3664 | 0 | unsigned SeqLen = Sequence.size(); |
3665 | 0 | MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); |
3666 | 0 | assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || |
3667 | 0 | ViaIntVT == MVT::i64) && |
3668 | 0 | "Unexpected sequence type"); |
3669 | | |
3670 | | // If we can use the original VL with the modified element type, this |
3671 | | // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this |
3672 | | // be moved into InsertVSETVLI? |
3673 | 0 | const unsigned RequiredVL = NumElts / SeqLen; |
3674 | 0 | const unsigned ViaVecLen = |
3675 | 0 | (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ? |
3676 | 0 | NumElts : RequiredVL; |
3677 | 0 | MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen); |
3678 | |
|
3679 | 0 | unsigned EltIdx = 0; |
3680 | 0 | uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); |
3681 | 0 | uint64_t SplatValue = 0; |
3682 | | // Construct the amalgamated value which can be splatted as this larger |
3683 | | // vector type. |
3684 | 0 | for (const auto &SeqV : Sequence) { |
3685 | 0 | if (!SeqV.isUndef()) |
3686 | 0 | SplatValue |= |
3687 | 0 | ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); |
3688 | 0 | EltIdx++; |
3689 | 0 | } |
3690 | | |
3691 | | // On RV64, sign-extend from 32 to 64 bits where possible in order to |
3692 | | // achieve better constant materializion. |
3693 | 0 | if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) |
3694 | 0 | SplatValue = SignExtend64<32>(SplatValue); |
3695 | | |
3696 | | // Since we can't introduce illegal i64 types at this stage, we can only |
3697 | | // perform an i64 splat on RV32 if it is its own sign-extended value. That |
3698 | | // way we can use RVV instructions to splat. |
3699 | 0 | assert((ViaIntVT.bitsLE(XLenVT) || |
3700 | 0 | (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && |
3701 | 0 | "Unexpected bitcast sequence"); |
3702 | 0 | if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { |
3703 | 0 | SDValue ViaVL = |
3704 | 0 | DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); |
3705 | 0 | MVT ViaContainerVT = |
3706 | 0 | getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); |
3707 | 0 | SDValue Splat = |
3708 | 0 | DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, |
3709 | 0 | DAG.getUNDEF(ViaContainerVT), |
3710 | 0 | DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); |
3711 | 0 | Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); |
3712 | 0 | if (ViaVecLen != RequiredVL) |
3713 | 0 | Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, |
3714 | 0 | MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, |
3715 | 0 | DAG.getConstant(0, DL, XLenVT)); |
3716 | 0 | return DAG.getBitcast(VT, Splat); |
3717 | 0 | } |
3718 | 0 | } |
3719 | | |
3720 | | // If the number of signbits allows, see if we can lower as a <N x i8>. |
3721 | | // Our main goal here is to reduce LMUL (and thus work) required to |
3722 | | // build the constant, but we will also narrow if the resulting |
3723 | | // narrow vector is known to materialize cheaply. |
3724 | | // TODO: We really should be costing the smaller vector. There are |
3725 | | // profitable cases this misses. |
3726 | 0 | if (EltBitSize > 8 && VT.isInteger() && |
3727 | 0 | (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) { |
3728 | 0 | unsigned SignBits = DAG.ComputeNumSignBits(Op); |
3729 | 0 | if (EltBitSize - SignBits < 8) { |
3730 | 0 | SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8), |
3731 | 0 | DL, Op->ops()); |
3732 | 0 | Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), |
3733 | 0 | Source, DAG, Subtarget); |
3734 | 0 | SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); |
3735 | 0 | return convertFromScalableVector(VT, Res, DAG, Subtarget); |
3736 | 0 | } |
3737 | 0 | } |
3738 | | |
3739 | 0 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
3740 | 0 | return Res; |
3741 | | |
3742 | | // For constant vectors, use generic constant pool lowering. Otherwise, |
3743 | | // we'd have to materialize constants in GPRs just to move them into the |
3744 | | // vector. |
3745 | 0 | return SDValue(); |
3746 | 0 | } |
3747 | | |
3748 | | static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, |
3749 | 0 | const RISCVSubtarget &Subtarget) { |
3750 | 0 | MVT VT = Op.getSimpleValueType(); |
3751 | 0 | assert(VT.isFixedLengthVector() && "Unexpected vector!"); |
3752 | | |
3753 | 0 | if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || |
3754 | 0 | ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) |
3755 | 0 | return lowerBuildVectorOfConstants(Op, DAG, Subtarget); |
3756 | | |
3757 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3758 | |
|
3759 | 0 | SDLoc DL(Op); |
3760 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
3761 | |
|
3762 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
3763 | |
|
3764 | 0 | if (VT.getVectorElementType() == MVT::i1) { |
3765 | | // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask |
3766 | | // vector type, we have a legal equivalently-sized i8 type, so we can use |
3767 | | // that. |
3768 | 0 | MVT WideVecVT = VT.changeVectorElementType(MVT::i8); |
3769 | 0 | SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); |
3770 | |
|
3771 | 0 | SDValue WideVec; |
3772 | 0 | if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
3773 | | // For a splat, perform a scalar truncate before creating the wider |
3774 | | // vector. |
3775 | 0 | Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat, |
3776 | 0 | DAG.getConstant(1, DL, Splat.getValueType())); |
3777 | 0 | WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); |
3778 | 0 | } else { |
3779 | 0 | SmallVector<SDValue, 8> Ops(Op->op_values()); |
3780 | 0 | WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); |
3781 | 0 | SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); |
3782 | 0 | WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); |
3783 | 0 | } |
3784 | |
|
3785 | 0 | return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); |
3786 | 0 | } |
3787 | | |
3788 | 0 | if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { |
3789 | 0 | if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) |
3790 | 0 | return Gather; |
3791 | 0 | unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL |
3792 | 0 | : RISCVISD::VMV_V_X_VL; |
3793 | 0 | if (!VT.isFloatingPoint()) |
3794 | 0 | Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat); |
3795 | 0 | Splat = |
3796 | 0 | DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); |
3797 | 0 | return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
3798 | 0 | } |
3799 | | |
3800 | 0 | if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) |
3801 | 0 | return Res; |
3802 | | |
3803 | | // If we're compiling for an exact VLEN value, we can split our work per |
3804 | | // register in the register group. |
3805 | 0 | const unsigned MinVLen = Subtarget.getRealMinVLen(); |
3806 | 0 | const unsigned MaxVLen = Subtarget.getRealMaxVLen(); |
3807 | 0 | if (MinVLen == MaxVLen && VT.getSizeInBits().getKnownMinValue() > MinVLen) { |
3808 | 0 | MVT ElemVT = VT.getVectorElementType(); |
3809 | 0 | unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); |
3810 | 0 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
3811 | 0 | MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); |
3812 | 0 | MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); |
3813 | 0 | assert(M1VT == getLMUL1VT(M1VT)); |
3814 | | |
3815 | | // The following semantically builds up a fixed length concat_vector |
3816 | | // of the component build_vectors. We eagerly lower to scalable and |
3817 | | // insert_subvector here to avoid DAG combining it back to a large |
3818 | | // build_vector. |
3819 | 0 | SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end()); |
3820 | 0 | unsigned NumOpElts = M1VT.getVectorMinNumElements(); |
3821 | 0 | SDValue Vec = DAG.getUNDEF(ContainerVT); |
3822 | 0 | for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) { |
3823 | 0 | auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg); |
3824 | 0 | SDValue SubBV = |
3825 | 0 | DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps); |
3826 | 0 | SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget); |
3827 | 0 | unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts; |
3828 | 0 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV, |
3829 | 0 | DAG.getVectorIdxConstant(InsertIdx, DL)); |
3830 | 0 | } |
3831 | 0 | return convertFromScalableVector(VT, Vec, DAG, Subtarget); |
3832 | 0 | } |
3833 | | |
3834 | | // Cap the cost at a value linear to the number of elements in the vector. |
3835 | | // The default lowering is to use the stack. The vector store + scalar loads |
3836 | | // is linear in VL. However, at high lmuls vslide1down and vslidedown end up |
3837 | | // being (at least) linear in LMUL. As a result, using the vslidedown |
3838 | | // lowering for every element ends up being VL*LMUL.. |
3839 | | // TODO: Should we be directly costing the stack alternative? Doing so might |
3840 | | // give us a more accurate upper bound. |
3841 | 0 | InstructionCost LinearBudget = VT.getVectorNumElements() * 2; |
3842 | | |
3843 | | // TODO: unify with TTI getSlideCost. |
3844 | 0 | InstructionCost PerSlideCost = 1; |
3845 | 0 | switch (RISCVTargetLowering::getLMUL(ContainerVT)) { |
3846 | 0 | default: break; |
3847 | 0 | case RISCVII::VLMUL::LMUL_2: |
3848 | 0 | PerSlideCost = 2; |
3849 | 0 | break; |
3850 | 0 | case RISCVII::VLMUL::LMUL_4: |
3851 | 0 | PerSlideCost = 4; |
3852 | 0 | break; |
3853 | 0 | case RISCVII::VLMUL::LMUL_8: |
3854 | 0 | PerSlideCost = 8; |
3855 | 0 | break; |
3856 | 0 | } |
3857 | | |
3858 | | // TODO: Should we be using the build instseq then cost + evaluate scheme |
3859 | | // we use for integer constants here? |
3860 | 0 | unsigned UndefCount = 0; |
3861 | 0 | for (const SDValue &V : Op->ops()) { |
3862 | 0 | if (V.isUndef()) { |
3863 | 0 | UndefCount++; |
3864 | 0 | continue; |
3865 | 0 | } |
3866 | 0 | if (UndefCount) { |
3867 | 0 | LinearBudget -= PerSlideCost; |
3868 | 0 | UndefCount = 0; |
3869 | 0 | } |
3870 | 0 | LinearBudget -= PerSlideCost; |
3871 | 0 | } |
3872 | 0 | if (UndefCount) { |
3873 | 0 | LinearBudget -= PerSlideCost; |
3874 | 0 | } |
3875 | |
|
3876 | 0 | if (LinearBudget < 0) |
3877 | 0 | return SDValue(); |
3878 | | |
3879 | 0 | assert((!VT.isFloatingPoint() || |
3880 | 0 | VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && |
3881 | 0 | "Illegal type which will result in reserved encoding"); |
3882 | | |
3883 | 0 | const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; |
3884 | |
|
3885 | 0 | SDValue Vec; |
3886 | 0 | UndefCount = 0; |
3887 | 0 | for (SDValue V : Op->ops()) { |
3888 | 0 | if (V.isUndef()) { |
3889 | 0 | UndefCount++; |
3890 | 0 | continue; |
3891 | 0 | } |
3892 | | |
3893 | | // Start our sequence with a TA splat in the hopes that hardware is able to |
3894 | | // recognize there's no dependency on the prior value of our temporary |
3895 | | // register. |
3896 | 0 | if (!Vec) { |
3897 | 0 | Vec = DAG.getSplatVector(VT, DL, V); |
3898 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
3899 | 0 | UndefCount = 0; |
3900 | 0 | continue; |
3901 | 0 | } |
3902 | | |
3903 | 0 | if (UndefCount) { |
3904 | 0 | const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); |
3905 | 0 | Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), |
3906 | 0 | Vec, Offset, Mask, VL, Policy); |
3907 | 0 | UndefCount = 0; |
3908 | 0 | } |
3909 | 0 | auto OpCode = |
3910 | 0 | VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; |
3911 | 0 | if (!VT.isFloatingPoint()) |
3912 | 0 | V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); |
3913 | 0 | Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, |
3914 | 0 | V, Mask, VL); |
3915 | 0 | } |
3916 | 0 | if (UndefCount) { |
3917 | 0 | const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); |
3918 | 0 | Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), |
3919 | 0 | Vec, Offset, Mask, VL, Policy); |
3920 | 0 | } |
3921 | 0 | return convertFromScalableVector(VT, Vec, DAG, Subtarget); |
3922 | 0 | } |
3923 | | |
3924 | | static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
3925 | | SDValue Lo, SDValue Hi, SDValue VL, |
3926 | 0 | SelectionDAG &DAG) { |
3927 | 0 | if (!Passthru) |
3928 | 0 | Passthru = DAG.getUNDEF(VT); |
3929 | 0 | if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { |
3930 | 0 | int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); |
3931 | 0 | int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); |
3932 | | // If Hi constant is all the same sign bit as Lo, lower this as a custom |
3933 | | // node in order to try and match RVV vector/scalar instructions. |
3934 | 0 | if ((LoC >> 31) == HiC) |
3935 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); |
3936 | | |
3937 | | // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo, |
3938 | | // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use |
3939 | | // vlmax vsetvli or vsetivli to change the VL. |
3940 | | // FIXME: Support larger constants? |
3941 | | // FIXME: Support non-constant VLs by saturating? |
3942 | 0 | if (LoC == HiC) { |
3943 | 0 | SDValue NewVL; |
3944 | 0 | if (isAllOnesConstant(VL) || |
3945 | 0 | (isa<RegisterSDNode>(VL) && |
3946 | 0 | cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) |
3947 | 0 | NewVL = DAG.getRegister(RISCV::X0, MVT::i32); |
3948 | 0 | else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal())) |
3949 | 0 | NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); |
3950 | |
|
3951 | 0 | if (NewVL) { |
3952 | 0 | MVT InterVT = |
3953 | 0 | MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); |
3954 | 0 | auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, |
3955 | 0 | DAG.getUNDEF(InterVT), Lo, |
3956 | 0 | DAG.getRegister(RISCV::X0, MVT::i32)); |
3957 | 0 | return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); |
3958 | 0 | } |
3959 | 0 | } |
3960 | 0 | } |
3961 | | |
3962 | | // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. |
3963 | 0 | if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && |
3964 | 0 | isa<ConstantSDNode>(Hi.getOperand(1)) && |
3965 | 0 | Hi.getConstantOperandVal(1) == 31) |
3966 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); |
3967 | | |
3968 | | // If the hi bits of the splat are undefined, then it's fine to just splat Lo |
3969 | | // even if it might be sign extended. |
3970 | 0 | if (Hi.isUndef()) |
3971 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); |
3972 | | |
3973 | | // Fall back to a stack store and stride x0 vector load. |
3974 | 0 | return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, |
3975 | 0 | Hi, VL); |
3976 | 0 | } |
3977 | | |
3978 | | // Called by type legalization to handle splat of i64 on RV32. |
3979 | | // FIXME: We can optimize this when the type has sign or zero bits in one |
3980 | | // of the halves. |
3981 | | static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, |
3982 | | SDValue Scalar, SDValue VL, |
3983 | 0 | SelectionDAG &DAG) { |
3984 | 0 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); |
3985 | 0 | SDValue Lo, Hi; |
3986 | 0 | std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32); |
3987 | 0 | return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); |
3988 | 0 | } |
3989 | | |
3990 | | // This function lowers a splat of a scalar operand Splat with the vector |
3991 | | // length VL. It ensures the final sequence is type legal, which is useful when |
3992 | | // lowering a splat after type legalization. |
3993 | | static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, |
3994 | | MVT VT, const SDLoc &DL, SelectionDAG &DAG, |
3995 | 0 | const RISCVSubtarget &Subtarget) { |
3996 | 0 | bool HasPassthru = Passthru && !Passthru.isUndef(); |
3997 | 0 | if (!HasPassthru && !Passthru) |
3998 | 0 | Passthru = DAG.getUNDEF(VT); |
3999 | 0 | if (VT.isFloatingPoint()) |
4000 | 0 | return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); |
4001 | | |
4002 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
4003 | | |
4004 | | // Simplest case is that the operand needs to be promoted to XLenVT. |
4005 | 0 | if (Scalar.getValueType().bitsLE(XLenVT)) { |
4006 | | // If the operand is a constant, sign extend to increase our chances |
4007 | | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4008 | | // a zero extend and the simm5 check in isel would fail. |
4009 | | // FIXME: Should we ignore the upper bits in isel instead? |
4010 | 0 | unsigned ExtOpc = |
4011 | 0 | isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4012 | 0 | Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); |
4013 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); |
4014 | 0 | } |
4015 | | |
4016 | 0 | assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && |
4017 | 0 | "Unexpected scalar for splat lowering!"); |
4018 | | |
4019 | 0 | if (isOneConstant(VL) && isNullConstant(Scalar)) |
4020 | 0 | return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, |
4021 | 0 | DAG.getConstant(0, DL, XLenVT), VL); |
4022 | | |
4023 | | // Otherwise use the more complicated splatting algorithm. |
4024 | 0 | return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); |
4025 | 0 | } |
4026 | | |
4027 | | // This function lowers an insert of a scalar operand Scalar into lane |
4028 | | // 0 of the vector regardless of the value of VL. The contents of the |
4029 | | // remaining lanes of the result vector are unspecified. VL is assumed |
4030 | | // to be non-zero. |
4031 | | static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, |
4032 | | const SDLoc &DL, SelectionDAG &DAG, |
4033 | 0 | const RISCVSubtarget &Subtarget) { |
4034 | 0 | assert(VT.isScalableVector() && "Expect VT is scalable vector type."); |
4035 | | |
4036 | 0 | const MVT XLenVT = Subtarget.getXLenVT(); |
4037 | 0 | SDValue Passthru = DAG.getUNDEF(VT); |
4038 | |
|
4039 | 0 | if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
4040 | 0 | isNullConstant(Scalar.getOperand(1))) { |
4041 | 0 | SDValue ExtractedVal = Scalar.getOperand(0); |
4042 | 0 | MVT ExtractedVT = ExtractedVal.getSimpleValueType(); |
4043 | 0 | MVT ExtractedContainerVT = ExtractedVT; |
4044 | 0 | if (ExtractedContainerVT.isFixedLengthVector()) { |
4045 | 0 | ExtractedContainerVT = getContainerForFixedLengthVector( |
4046 | 0 | DAG, ExtractedContainerVT, Subtarget); |
4047 | 0 | ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal, |
4048 | 0 | DAG, Subtarget); |
4049 | 0 | } |
4050 | 0 | if (ExtractedContainerVT.bitsLE(VT)) |
4051 | 0 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal, |
4052 | 0 | DAG.getConstant(0, DL, XLenVT)); |
4053 | 0 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, |
4054 | 0 | DAG.getConstant(0, DL, XLenVT)); |
4055 | 0 | } |
4056 | | |
4057 | | |
4058 | 0 | if (VT.isFloatingPoint()) |
4059 | 0 | return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, |
4060 | 0 | DAG.getUNDEF(VT), Scalar, VL); |
4061 | | |
4062 | | // Avoid the tricky legalization cases by falling back to using the |
4063 | | // splat code which already handles it gracefully. |
4064 | 0 | if (!Scalar.getValueType().bitsLE(XLenVT)) |
4065 | 0 | return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, |
4066 | 0 | DAG.getConstant(1, DL, XLenVT), |
4067 | 0 | VT, DL, DAG, Subtarget); |
4068 | | |
4069 | | // If the operand is a constant, sign extend to increase our chances |
4070 | | // of being able to use a .vi instruction. ANY_EXTEND would become a |
4071 | | // a zero extend and the simm5 check in isel would fail. |
4072 | | // FIXME: Should we ignore the upper bits in isel instead? |
4073 | 0 | unsigned ExtOpc = |
4074 | 0 | isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
4075 | 0 | Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); |
4076 | 0 | return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, |
4077 | 0 | DAG.getUNDEF(VT), Scalar, VL); |
4078 | 0 | } |
4079 | | |
4080 | | // Is this a shuffle extracts either the even or odd elements of a vector? |
4081 | | // That is, specifically, either (a) or (b) below. |
4082 | | // t34: v8i8 = extract_subvector t11, Constant:i64<0> |
4083 | | // t33: v8i8 = extract_subvector t11, Constant:i64<8> |
4084 | | // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 |
4085 | | // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 |
4086 | | // Returns {Src Vector, Even Elements} om success |
4087 | | static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, |
4088 | | SDValue V2, ArrayRef<int> Mask, |
4089 | 0 | const RISCVSubtarget &Subtarget) { |
4090 | | // Need to be able to widen the vector. |
4091 | 0 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4092 | 0 | return false; |
4093 | | |
4094 | | // Both input must be extracts. |
4095 | 0 | if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
4096 | 0 | V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
4097 | 0 | return false; |
4098 | | |
4099 | | // Extracting from the same source. |
4100 | 0 | SDValue Src = V1.getOperand(0); |
4101 | 0 | if (Src != V2.getOperand(0)) |
4102 | 0 | return false; |
4103 | | |
4104 | | // Src needs to have twice the number of elements. |
4105 | 0 | if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) |
4106 | 0 | return false; |
4107 | | |
4108 | | // The extracts must extract the two halves of the source. |
4109 | 0 | if (V1.getConstantOperandVal(1) != 0 || |
4110 | 0 | V2.getConstantOperandVal(1) != Mask.size()) |
4111 | 0 | return false; |
4112 | | |
4113 | | // First index must be the first even or odd element from V1. |
4114 | 0 | if (Mask[0] != 0 && Mask[0] != 1) |
4115 | 0 | return false; |
4116 | | |
4117 | | // The others must increase by 2 each time. |
4118 | | // TODO: Support undef elements? |
4119 | 0 | for (unsigned i = 1; i != Mask.size(); ++i) |
4120 | 0 | if (Mask[i] != Mask[i - 1] + 2) |
4121 | 0 | return false; |
4122 | | |
4123 | 0 | return true; |
4124 | 0 | } |
4125 | | |
4126 | | /// Is this shuffle interleaving contiguous elements from one vector into the |
4127 | | /// even elements and contiguous elements from another vector into the odd |
4128 | | /// elements. \p EvenSrc will contain the element that should be in the first |
4129 | | /// even element. \p OddSrc will contain the element that should be in the first |
4130 | | /// odd element. These can be the first element in a source or the element half |
4131 | | /// way through the source. |
4132 | | static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc, |
4133 | 0 | int &OddSrc, const RISCVSubtarget &Subtarget) { |
4134 | | // We need to be able to widen elements to the next larger integer type. |
4135 | 0 | if (VT.getScalarSizeInBits() >= Subtarget.getELen()) |
4136 | 0 | return false; |
4137 | | |
4138 | 0 | int Size = Mask.size(); |
4139 | 0 | int NumElts = VT.getVectorNumElements(); |
4140 | 0 | assert(Size == (int)NumElts && "Unexpected mask size"); |
4141 | | |
4142 | 0 | SmallVector<unsigned, 2> StartIndexes; |
4143 | 0 | if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) |
4144 | 0 | return false; |
4145 | | |
4146 | 0 | EvenSrc = StartIndexes[0]; |
4147 | 0 | OddSrc = StartIndexes[1]; |
4148 | | |
4149 | | // One source should be low half of first vector. |
4150 | 0 | if (EvenSrc != 0 && OddSrc != 0) |
4151 | 0 | return false; |
4152 | | |
4153 | | // Subvectors will be subtracted from either at the start of the two input |
4154 | | // vectors, or at the start and middle of the first vector if it's an unary |
4155 | | // interleave. |
4156 | | // In both cases, HalfNumElts will be extracted. |
4157 | | // We need to ensure that the extract indices are 0 or HalfNumElts otherwise |
4158 | | // we'll create an illegal extract_subvector. |
4159 | | // FIXME: We could support other values using a slidedown first. |
4160 | 0 | int HalfNumElts = NumElts / 2; |
4161 | 0 | return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0); |
4162 | 0 | } |
4163 | | |
4164 | | /// Match shuffles that concatenate two vectors, rotate the concatenation, |
4165 | | /// and then extract the original number of elements from the rotated result. |
4166 | | /// This is equivalent to vector.splice or X86's PALIGNR instruction. The |
4167 | | /// returned rotation amount is for a rotate right, where elements move from |
4168 | | /// higher elements to lower elements. \p LoSrc indicates the first source |
4169 | | /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector |
4170 | | /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be |
4171 | | /// 0 or 1 if a rotation is found. |
4172 | | /// |
4173 | | /// NOTE: We talk about rotate to the right which matches how bit shift and |
4174 | | /// rotate instructions are described where LSBs are on the right, but LLVM IR |
4175 | | /// and the table below write vectors with the lowest elements on the left. |
4176 | 0 | static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { |
4177 | 0 | int Size = Mask.size(); |
4178 | | |
4179 | | // We need to detect various ways of spelling a rotation: |
4180 | | // [11, 12, 13, 14, 15, 0, 1, 2] |
4181 | | // [-1, 12, 13, 14, -1, -1, 1, -1] |
4182 | | // [-1, -1, -1, -1, -1, -1, 1, 2] |
4183 | | // [ 3, 4, 5, 6, 7, 8, 9, 10] |
4184 | | // [-1, 4, 5, 6, -1, -1, 9, -1] |
4185 | | // [-1, 4, 5, 6, -1, -1, -1, -1] |
4186 | 0 | int Rotation = 0; |
4187 | 0 | LoSrc = -1; |
4188 | 0 | HiSrc = -1; |
4189 | 0 | for (int i = 0; i != Size; ++i) { |
4190 | 0 | int M = Mask[i]; |
4191 | 0 | if (M < 0) |
4192 | 0 | continue; |
4193 | | |
4194 | | // Determine where a rotate vector would have started. |
4195 | 0 | int StartIdx = i - (M % Size); |
4196 | | // The identity rotation isn't interesting, stop. |
4197 | 0 | if (StartIdx == 0) |
4198 | 0 | return -1; |
4199 | | |
4200 | | // If we found the tail of a vector the rotation must be the missing |
4201 | | // front. If we found the head of a vector, it must be how much of the |
4202 | | // head. |
4203 | 0 | int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; |
4204 | |
|
4205 | 0 | if (Rotation == 0) |
4206 | 0 | Rotation = CandidateRotation; |
4207 | 0 | else if (Rotation != CandidateRotation) |
4208 | | // The rotations don't match, so we can't match this mask. |
4209 | 0 | return -1; |
4210 | | |
4211 | | // Compute which value this mask is pointing at. |
4212 | 0 | int MaskSrc = M < Size ? 0 : 1; |
4213 | | |
4214 | | // Compute which of the two target values this index should be assigned to. |
4215 | | // This reflects whether the high elements are remaining or the low elemnts |
4216 | | // are remaining. |
4217 | 0 | int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; |
4218 | | |
4219 | | // Either set up this value if we've not encountered it before, or check |
4220 | | // that it remains consistent. |
4221 | 0 | if (TargetSrc < 0) |
4222 | 0 | TargetSrc = MaskSrc; |
4223 | 0 | else if (TargetSrc != MaskSrc) |
4224 | | // This may be a rotation, but it pulls from the inputs in some |
4225 | | // unsupported interleaving. |
4226 | 0 | return -1; |
4227 | 0 | } |
4228 | | |
4229 | | // Check that we successfully analyzed the mask, and normalize the results. |
4230 | 0 | assert(Rotation != 0 && "Failed to locate a viable rotation!"); |
4231 | 0 | assert((LoSrc >= 0 || HiSrc >= 0) && |
4232 | 0 | "Failed to find a rotated input vector!"); |
4233 | | |
4234 | 0 | return Rotation; |
4235 | 0 | } |
4236 | | |
4237 | | // Lower a deinterleave shuffle to vnsrl. |
4238 | | // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) |
4239 | | // -> [p, q, r, s] (EvenElts == false) |
4240 | | // VT is the type of the vector to return, <[vscale x ]n x ty> |
4241 | | // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> |
4242 | | static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, |
4243 | | bool EvenElts, |
4244 | | const RISCVSubtarget &Subtarget, |
4245 | 0 | SelectionDAG &DAG) { |
4246 | | // The result is a vector of type <m x n x ty> |
4247 | 0 | MVT ContainerVT = VT; |
4248 | | // Convert fixed vectors to scalable if needed |
4249 | 0 | if (ContainerVT.isFixedLengthVector()) { |
4250 | 0 | assert(Src.getSimpleValueType().isFixedLengthVector()); |
4251 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); |
4252 | | |
4253 | | // The source is a vector of type <m x n*2 x ty> |
4254 | 0 | MVT SrcContainerVT = |
4255 | 0 | MVT::getVectorVT(ContainerVT.getVectorElementType(), |
4256 | 0 | ContainerVT.getVectorElementCount() * 2); |
4257 | 0 | Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
4258 | 0 | } |
4259 | | |
4260 | 0 | auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
4261 | | |
4262 | | // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2> |
4263 | | // This also converts FP to int. |
4264 | 0 | unsigned EltBits = ContainerVT.getScalarSizeInBits(); |
4265 | 0 | MVT WideSrcContainerVT = MVT::getVectorVT( |
4266 | 0 | MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount()); |
4267 | 0 | Src = DAG.getBitcast(WideSrcContainerVT, Src); |
4268 | | |
4269 | | // The integer version of the container type. |
4270 | 0 | MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger(); |
4271 | | |
4272 | | // If we want even elements, then the shift amount is 0. Otherwise, shift by |
4273 | | // the original element size. |
4274 | 0 | unsigned Shift = EvenElts ? 0 : EltBits; |
4275 | 0 | SDValue SplatShift = DAG.getNode( |
4276 | 0 | RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), |
4277 | 0 | DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); |
4278 | 0 | SDValue Res = |
4279 | 0 | DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, |
4280 | 0 | DAG.getUNDEF(IntContainerVT), TrueMask, VL); |
4281 | | // Cast back to FP if needed. |
4282 | 0 | Res = DAG.getBitcast(ContainerVT, Res); |
4283 | |
|
4284 | 0 | if (VT.isFixedLengthVector()) |
4285 | 0 | Res = convertFromScalableVector(VT, Res, DAG, Subtarget); |
4286 | 0 | return Res; |
4287 | 0 | } |
4288 | | |
4289 | | // Lower the following shuffle to vslidedown. |
4290 | | // a) |
4291 | | // t49: v8i8 = extract_subvector t13, Constant:i64<0> |
4292 | | // t109: v8i8 = extract_subvector t13, Constant:i64<8> |
4293 | | // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 |
4294 | | // b) |
4295 | | // t69: v16i16 = extract_subvector t68, Constant:i64<0> |
4296 | | // t23: v8i16 = extract_subvector t69, Constant:i64<0> |
4297 | | // t29: v4i16 = extract_subvector t23, Constant:i64<4> |
4298 | | // t26: v8i16 = extract_subvector t69, Constant:i64<8> |
4299 | | // t30: v4i16 = extract_subvector t26, Constant:i64<0> |
4300 | | // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 |
4301 | | static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, |
4302 | | SDValue V1, SDValue V2, |
4303 | | ArrayRef<int> Mask, |
4304 | | const RISCVSubtarget &Subtarget, |
4305 | 0 | SelectionDAG &DAG) { |
4306 | 0 | auto findNonEXTRACT_SUBVECTORParent = |
4307 | 0 | [](SDValue Parent) -> std::pair<SDValue, uint64_t> { |
4308 | 0 | uint64_t Offset = 0; |
4309 | 0 | while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
4310 | | // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from |
4311 | | // a scalable vector. But we don't want to match the case. |
4312 | 0 | Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { |
4313 | 0 | Offset += Parent.getConstantOperandVal(1); |
4314 | 0 | Parent = Parent.getOperand(0); |
4315 | 0 | } |
4316 | 0 | return std::make_pair(Parent, Offset); |
4317 | 0 | }; |
4318 | |
|
4319 | 0 | auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); |
4320 | 0 | auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); |
4321 | | |
4322 | | // Extracting from the same source. |
4323 | 0 | SDValue Src = V1Src; |
4324 | 0 | if (Src != V2Src) |
4325 | 0 | return SDValue(); |
4326 | | |
4327 | | // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. |
4328 | 0 | SmallVector<int, 16> NewMask(Mask); |
4329 | 0 | for (size_t i = 0; i != NewMask.size(); ++i) { |
4330 | 0 | if (NewMask[i] == -1) |
4331 | 0 | continue; |
4332 | | |
4333 | 0 | if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { |
4334 | 0 | NewMask[i] = NewMask[i] + V1IndexOffset; |
4335 | 0 | } else { |
4336 | | // Minus NewMask.size() is needed. Otherwise, the b case would be |
4337 | | // <5,6,7,12> instead of <5,6,7,8>. |
4338 | 0 | NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; |
4339 | 0 | } |
4340 | 0 | } |
4341 | | |
4342 | | // First index must be known and non-zero. It will be used as the slidedown |
4343 | | // amount. |
4344 | 0 | if (NewMask[0] <= 0) |
4345 | 0 | return SDValue(); |
4346 | | |
4347 | | // NewMask is also continuous. |
4348 | 0 | for (unsigned i = 1; i != NewMask.size(); ++i) |
4349 | 0 | if (NewMask[i - 1] + 1 != NewMask[i]) |
4350 | 0 | return SDValue(); |
4351 | | |
4352 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
4353 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
4354 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); |
4355 | 0 | auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); |
4356 | 0 | SDValue Slidedown = |
4357 | 0 | getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), |
4358 | 0 | convertToScalableVector(ContainerVT, Src, DAG, Subtarget), |
4359 | 0 | DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); |
4360 | 0 | return DAG.getNode( |
4361 | 0 | ISD::EXTRACT_SUBVECTOR, DL, VT, |
4362 | 0 | convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), |
4363 | 0 | DAG.getConstant(0, DL, XLenVT)); |
4364 | 0 | } |
4365 | | |
4366 | | // Because vslideup leaves the destination elements at the start intact, we can |
4367 | | // use it to perform shuffles that insert subvectors: |
4368 | | // |
4369 | | // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11> |
4370 | | // -> |
4371 | | // vsetvli zero, 8, e8, mf2, ta, ma |
4372 | | // vslideup.vi v8, v9, 4 |
4373 | | // |
4374 | | // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7> |
4375 | | // -> |
4376 | | // vsetvli zero, 5, e8, mf2, tu, ma |
4377 | | // vslideup.v1 v8, v9, 2 |
4378 | | static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, |
4379 | | SDValue V1, SDValue V2, |
4380 | | ArrayRef<int> Mask, |
4381 | | const RISCVSubtarget &Subtarget, |
4382 | 0 | SelectionDAG &DAG) { |
4383 | 0 | unsigned NumElts = VT.getVectorNumElements(); |
4384 | 0 | int NumSubElts, Index; |
4385 | 0 | if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts, |
4386 | 0 | Index)) |
4387 | 0 | return SDValue(); |
4388 | | |
4389 | 0 | bool OpsSwapped = Mask[Index] < (int)NumElts; |
4390 | 0 | SDValue InPlace = OpsSwapped ? V2 : V1; |
4391 | 0 | SDValue ToInsert = OpsSwapped ? V1 : V2; |
4392 | |
|
4393 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
4394 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4395 | 0 | auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first; |
4396 | | // We slide up by the index that the subvector is being inserted at, and set |
4397 | | // VL to the index + the number of elements being inserted. |
4398 | 0 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC; |
4399 | | // If the we're adding a suffix to the in place vector, i.e. inserting right |
4400 | | // up to the very end of it, then we don't actually care about the tail. |
4401 | 0 | if (NumSubElts + Index >= (int)NumElts) |
4402 | 0 | Policy |= RISCVII::TAIL_AGNOSTIC; |
4403 | |
|
4404 | 0 | InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget); |
4405 | 0 | ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget); |
4406 | 0 | SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT); |
4407 | |
|
4408 | 0 | SDValue Res; |
4409 | | // If we're inserting into the lowest elements, use a tail undisturbed |
4410 | | // vmv.v.v. |
4411 | 0 | if (Index == 0) |
4412 | 0 | Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert, |
4413 | 0 | VL); |
4414 | 0 | else |
4415 | 0 | Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert, |
4416 | 0 | DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy); |
4417 | 0 | return convertFromScalableVector(VT, Res, DAG, Subtarget); |
4418 | 0 | } |
4419 | | |
4420 | | /// Match v(f)slide1up/down idioms. These operations involve sliding |
4421 | | /// N-1 elements to make room for an inserted scalar at one end. |
4422 | | static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, |
4423 | | SDValue V1, SDValue V2, |
4424 | | ArrayRef<int> Mask, |
4425 | | const RISCVSubtarget &Subtarget, |
4426 | 0 | SelectionDAG &DAG) { |
4427 | 0 | bool OpsSwapped = false; |
4428 | 0 | if (!isa<BuildVectorSDNode>(V1)) { |
4429 | 0 | if (!isa<BuildVectorSDNode>(V2)) |
4430 | 0 | return SDValue(); |
4431 | 0 | std::swap(V1, V2); |
4432 | 0 | OpsSwapped = true; |
4433 | 0 | } |
4434 | 0 | SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue(); |
4435 | 0 | if (!Splat) |
4436 | 0 | return SDValue(); |
4437 | | |
4438 | | // Return true if the mask could describe a slide of Mask.size() - 1 |
4439 | | // elements from concat_vector(V1, V2)[Base:] to [Offset:]. |
4440 | 0 | auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) { |
4441 | 0 | const unsigned S = (Offset > 0) ? 0 : -Offset; |
4442 | 0 | const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0); |
4443 | 0 | for (unsigned i = S; i != E; ++i) |
4444 | 0 | if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset) |
4445 | 0 | return false; |
4446 | 0 | return true; |
4447 | 0 | }; |
4448 | |
|
4449 | 0 | const unsigned NumElts = VT.getVectorNumElements(); |
4450 | 0 | bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1); |
4451 | 0 | if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1)) |
4452 | 0 | return SDValue(); |
4453 | | |
4454 | 0 | const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0]; |
4455 | | // Inserted lane must come from splat, undef scalar is legal but not profitable. |
4456 | 0 | if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped) |
4457 | 0 | return SDValue(); |
4458 | | |
4459 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4460 | 0 | auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
4461 | 0 | auto OpCode = IsVSlidedown ? |
4462 | 0 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) : |
4463 | 0 | (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL); |
4464 | 0 | if (!VT.isFloatingPoint()) |
4465 | 0 | Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat); |
4466 | 0 | auto Vec = DAG.getNode(OpCode, DL, ContainerVT, |
4467 | 0 | DAG.getUNDEF(ContainerVT), |
4468 | 0 | convertToScalableVector(ContainerVT, V2, DAG, Subtarget), |
4469 | 0 | Splat, TrueMask, VL); |
4470 | 0 | return convertFromScalableVector(VT, Vec, DAG, Subtarget); |
4471 | 0 | } |
4472 | | |
4473 | | // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx |
4474 | | // to create an interleaved vector of <[vscale x] n*2 x ty>. |
4475 | | // This requires that the size of ty is less than the subtarget's maximum ELEN. |
4476 | | static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, |
4477 | | const SDLoc &DL, SelectionDAG &DAG, |
4478 | 0 | const RISCVSubtarget &Subtarget) { |
4479 | 0 | MVT VecVT = EvenV.getSimpleValueType(); |
4480 | 0 | MVT VecContainerVT = VecVT; // <vscale x n x ty> |
4481 | | // Convert fixed vectors to scalable if needed |
4482 | 0 | if (VecContainerVT.isFixedLengthVector()) { |
4483 | 0 | VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); |
4484 | 0 | EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget); |
4485 | 0 | OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget); |
4486 | 0 | } |
4487 | |
|
4488 | 0 | assert(VecVT.getScalarSizeInBits() < Subtarget.getELen()); |
4489 | | |
4490 | | // We're working with a vector of the same size as the resulting |
4491 | | // interleaved vector, but with half the number of elements and |
4492 | | // twice the SEW (Hence the restriction on not using the maximum |
4493 | | // ELEN) |
4494 | 0 | MVT WideVT = |
4495 | 0 | MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2), |
4496 | 0 | VecVT.getVectorElementCount()); |
4497 | 0 | MVT WideContainerVT = WideVT; // <vscale x n x ty*2> |
4498 | 0 | if (WideContainerVT.isFixedLengthVector()) |
4499 | 0 | WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget); |
4500 | | |
4501 | | // Bitcast the input vectors to integers in case they are FP |
4502 | 0 | VecContainerVT = VecContainerVT.changeTypeToInteger(); |
4503 | 0 | EvenV = DAG.getBitcast(VecContainerVT, EvenV); |
4504 | 0 | OddV = DAG.getBitcast(VecContainerVT, OddV); |
4505 | |
|
4506 | 0 | auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget); |
4507 | 0 | SDValue Passthru = DAG.getUNDEF(WideContainerVT); |
4508 | |
|
4509 | 0 | SDValue Interleaved; |
4510 | 0 | if (Subtarget.hasStdExtZvbb()) { |
4511 | | // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. |
4512 | 0 | SDValue OffsetVec = |
4513 | 0 | DAG.getSplatVector(VecContainerVT, DL, |
4514 | 0 | DAG.getConstant(VecVT.getScalarSizeInBits(), DL, |
4515 | 0 | Subtarget.getXLenVT())); |
4516 | 0 | Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, |
4517 | 0 | OffsetVec, Passthru, Mask, VL); |
4518 | 0 | Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, |
4519 | 0 | Interleaved, EvenV, Passthru, Mask, VL); |
4520 | 0 | } else { |
4521 | | // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with |
4522 | | // vwaddu.vv |
4523 | 0 | Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV, |
4524 | 0 | OddV, Passthru, Mask, VL); |
4525 | | |
4526 | | // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1) |
4527 | 0 | SDValue AllOnesVec = DAG.getSplatVector( |
4528 | 0 | VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT())); |
4529 | 0 | SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, |
4530 | 0 | OddV, AllOnesVec, Passthru, Mask, VL); |
4531 | | |
4532 | | // Add the two together so we get |
4533 | | // (OddV * 0xff...ff) + (OddV + EvenV) |
4534 | | // = (OddV * 0x100...00) + EvenV |
4535 | | // = (OddV << VecVT.getScalarSizeInBits()) + EvenV |
4536 | | // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx |
4537 | 0 | Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, |
4538 | 0 | Interleaved, OddsMul, Passthru, Mask, VL); |
4539 | 0 | } |
4540 | | |
4541 | | // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty> |
4542 | 0 | MVT ResultContainerVT = MVT::getVectorVT( |
4543 | 0 | VecVT.getVectorElementType(), // Make sure to use original type |
4544 | 0 | VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2)); |
4545 | 0 | Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved); |
4546 | | |
4547 | | // Convert back to a fixed vector if needed |
4548 | 0 | MVT ResultVT = |
4549 | 0 | MVT::getVectorVT(VecVT.getVectorElementType(), |
4550 | 0 | VecVT.getVectorElementCount().multiplyCoefficientBy(2)); |
4551 | 0 | if (ResultVT.isFixedLengthVector()) |
4552 | 0 | Interleaved = |
4553 | 0 | convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget); |
4554 | |
|
4555 | 0 | return Interleaved; |
4556 | 0 | } |
4557 | | |
4558 | | // If we have a vector of bits that we want to reverse, we can use a vbrev on a |
4559 | | // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse. |
4560 | | static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, |
4561 | | SelectionDAG &DAG, |
4562 | 0 | const RISCVSubtarget &Subtarget) { |
4563 | 0 | SDLoc DL(SVN); |
4564 | 0 | MVT VT = SVN->getSimpleValueType(0); |
4565 | 0 | SDValue V = SVN->getOperand(0); |
4566 | 0 | unsigned NumElts = VT.getVectorNumElements(); |
4567 | |
|
4568 | 0 | assert(VT.getVectorElementType() == MVT::i1); |
4569 | | |
4570 | 0 | if (!ShuffleVectorInst::isReverseMask(SVN->getMask(), |
4571 | 0 | SVN->getMask().size()) || |
4572 | 0 | !SVN->getOperand(1).isUndef()) |
4573 | 0 | return SDValue(); |
4574 | | |
4575 | 0 | unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts)); |
4576 | 0 | EVT ViaVT = EVT::getVectorVT( |
4577 | 0 | *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1); |
4578 | 0 | EVT ViaBitVT = |
4579 | 0 | EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits()); |
4580 | | |
4581 | | // If we don't have zvbb or the larger element type > ELEN, the operation will |
4582 | | // be illegal. |
4583 | 0 | if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE, |
4584 | 0 | ViaVT) || |
4585 | 0 | !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT)) |
4586 | 0 | return SDValue(); |
4587 | | |
4588 | | // If the bit vector doesn't fit exactly into the larger element type, we need |
4589 | | // to insert it into the larger vector and then shift up the reversed bits |
4590 | | // afterwards to get rid of the gap introduced. |
4591 | 0 | if (ViaEltSize > NumElts) |
4592 | 0 | V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT), |
4593 | 0 | V, DAG.getVectorIdxConstant(0, DL)); |
4594 | |
|
4595 | 0 | SDValue Res = |
4596 | 0 | DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V)); |
4597 | | |
4598 | | // Shift up the reversed bits if the vector didn't exactly fit into the larger |
4599 | | // element type. |
4600 | 0 | if (ViaEltSize > NumElts) |
4601 | 0 | Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res, |
4602 | 0 | DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT)); |
4603 | |
|
4604 | 0 | Res = DAG.getBitcast(ViaBitVT, Res); |
4605 | |
|
4606 | 0 | if (ViaEltSize > NumElts) |
4607 | 0 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
4608 | 0 | DAG.getVectorIdxConstant(0, DL)); |
4609 | 0 | return Res; |
4610 | 0 | } |
4611 | | |
4612 | | // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can |
4613 | | // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this |
4614 | | // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor. |
4615 | | static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, |
4616 | | SelectionDAG &DAG, |
4617 | 0 | const RISCVSubtarget &Subtarget) { |
4618 | 0 | SDLoc DL(SVN); |
4619 | |
|
4620 | 0 | EVT VT = SVN->getValueType(0); |
4621 | 0 | unsigned NumElts = VT.getVectorNumElements(); |
4622 | 0 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
4623 | 0 | unsigned NumSubElts, RotateAmt; |
4624 | 0 | if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, |
4625 | 0 | NumElts, NumSubElts, RotateAmt)) |
4626 | 0 | return SDValue(); |
4627 | 0 | MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts), |
4628 | 0 | NumElts / NumSubElts); |
4629 | | |
4630 | | // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x. |
4631 | 0 | if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT)) |
4632 | 0 | return SDValue(); |
4633 | | |
4634 | 0 | SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0)); |
4635 | |
|
4636 | 0 | SDValue Rotate; |
4637 | | // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap, |
4638 | | // so canonicalize to vrev8. |
4639 | 0 | if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8) |
4640 | 0 | Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op); |
4641 | 0 | else |
4642 | 0 | Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op, |
4643 | 0 | DAG.getConstant(RotateAmt, DL, RotateVT)); |
4644 | |
|
4645 | 0 | return DAG.getBitcast(VT, Rotate); |
4646 | 0 | } |
4647 | | |
4648 | | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, |
4649 | 0 | const RISCVSubtarget &Subtarget) { |
4650 | 0 | SDValue V1 = Op.getOperand(0); |
4651 | 0 | SDValue V2 = Op.getOperand(1); |
4652 | 0 | SDLoc DL(Op); |
4653 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
4654 | 0 | MVT VT = Op.getSimpleValueType(); |
4655 | 0 | unsigned NumElts = VT.getVectorNumElements(); |
4656 | 0 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); |
4657 | |
|
4658 | 0 | if (VT.getVectorElementType() == MVT::i1) { |
4659 | | // Lower to a vror.vi of a larger element type if possible before we promote |
4660 | | // i1s to i8s. |
4661 | 0 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
4662 | 0 | return V; |
4663 | 0 | if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget)) |
4664 | 0 | return V; |
4665 | | |
4666 | | // Promote i1 shuffle to i8 shuffle. |
4667 | 0 | MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()); |
4668 | 0 | V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1); |
4669 | 0 | V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT) |
4670 | 0 | : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2); |
4671 | 0 | SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask()); |
4672 | 0 | return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT), |
4673 | 0 | ISD::SETNE); |
4674 | 0 | } |
4675 | | |
4676 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
4677 | |
|
4678 | 0 | auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
4679 | |
|
4680 | 0 | if (SVN->isSplat()) { |
4681 | 0 | const int Lane = SVN->getSplatIndex(); |
4682 | 0 | if (Lane >= 0) { |
4683 | 0 | MVT SVT = VT.getVectorElementType(); |
4684 | | |
4685 | | // Turn splatted vector load into a strided load with an X0 stride. |
4686 | 0 | SDValue V = V1; |
4687 | | // Peek through CONCAT_VECTORS as VectorCombine can concat a vector |
4688 | | // with undef. |
4689 | | // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? |
4690 | 0 | int Offset = Lane; |
4691 | 0 | if (V.getOpcode() == ISD::CONCAT_VECTORS) { |
4692 | 0 | int OpElements = |
4693 | 0 | V.getOperand(0).getSimpleValueType().getVectorNumElements(); |
4694 | 0 | V = V.getOperand(Offset / OpElements); |
4695 | 0 | Offset %= OpElements; |
4696 | 0 | } |
4697 | | |
4698 | | // We need to ensure the load isn't atomic or volatile. |
4699 | 0 | if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { |
4700 | 0 | auto *Ld = cast<LoadSDNode>(V); |
4701 | 0 | Offset *= SVT.getStoreSize(); |
4702 | 0 | SDValue NewAddr = DAG.getMemBasePlusOffset( |
4703 | 0 | Ld->getBasePtr(), TypeSize::getFixed(Offset), DL); |
4704 | | |
4705 | | // If this is SEW=64 on RV32, use a strided load with a stride of x0. |
4706 | 0 | if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { |
4707 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
4708 | 0 | SDValue IntID = |
4709 | 0 | DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); |
4710 | 0 | SDValue Ops[] = {Ld->getChain(), |
4711 | 0 | IntID, |
4712 | 0 | DAG.getUNDEF(ContainerVT), |
4713 | 0 | NewAddr, |
4714 | 0 | DAG.getRegister(RISCV::X0, XLenVT), |
4715 | 0 | VL}; |
4716 | 0 | SDValue NewLoad = DAG.getMemIntrinsicNode( |
4717 | 0 | ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, |
4718 | 0 | DAG.getMachineFunction().getMachineMemOperand( |
4719 | 0 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
4720 | 0 | DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); |
4721 | 0 | return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); |
4722 | 0 | } |
4723 | | |
4724 | | // Otherwise use a scalar load and splat. This will give the best |
4725 | | // opportunity to fold a splat into the operation. ISel can turn it into |
4726 | | // the x0 strided load if we aren't able to fold away the select. |
4727 | 0 | if (SVT.isFloatingPoint()) |
4728 | 0 | V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, |
4729 | 0 | Ld->getPointerInfo().getWithOffset(Offset), |
4730 | 0 | Ld->getOriginalAlign(), |
4731 | 0 | Ld->getMemOperand()->getFlags()); |
4732 | 0 | else |
4733 | 0 | V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, |
4734 | 0 | Ld->getPointerInfo().getWithOffset(Offset), SVT, |
4735 | 0 | Ld->getOriginalAlign(), |
4736 | 0 | Ld->getMemOperand()->getFlags()); |
4737 | 0 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
4738 | |
|
4739 | 0 | unsigned Opc = |
4740 | 0 | VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; |
4741 | 0 | SDValue Splat = |
4742 | 0 | DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); |
4743 | 0 | return convertFromScalableVector(VT, Splat, DAG, Subtarget); |
4744 | 0 | } |
4745 | | |
4746 | 0 | V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); |
4747 | 0 | assert(Lane < (int)NumElts && "Unexpected lane!"); |
4748 | 0 | SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, |
4749 | 0 | V1, DAG.getConstant(Lane, DL, XLenVT), |
4750 | 0 | DAG.getUNDEF(ContainerVT), TrueMask, VL); |
4751 | 0 | return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
4752 | 0 | } |
4753 | 0 | } |
4754 | | |
4755 | 0 | ArrayRef<int> Mask = SVN->getMask(); |
4756 | |
|
4757 | 0 | if (SDValue V = |
4758 | 0 | lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
4759 | 0 | return V; |
4760 | | |
4761 | 0 | if (SDValue V = |
4762 | 0 | lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
4763 | 0 | return V; |
4764 | | |
4765 | | // A bitrotate will be one instruction on Zvkb, so try to lower to it first if |
4766 | | // available. |
4767 | 0 | if (Subtarget.hasStdExtZvkb()) |
4768 | 0 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
4769 | 0 | return V; |
4770 | | |
4771 | | // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may |
4772 | | // be undef which can be handled with a single SLIDEDOWN/UP. |
4773 | 0 | int LoSrc, HiSrc; |
4774 | 0 | int Rotation = isElementRotate(LoSrc, HiSrc, Mask); |
4775 | 0 | if (Rotation > 0) { |
4776 | 0 | SDValue LoV, HiV; |
4777 | 0 | if (LoSrc >= 0) { |
4778 | 0 | LoV = LoSrc == 0 ? V1 : V2; |
4779 | 0 | LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); |
4780 | 0 | } |
4781 | 0 | if (HiSrc >= 0) { |
4782 | 0 | HiV = HiSrc == 0 ? V1 : V2; |
4783 | 0 | HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); |
4784 | 0 | } |
4785 | | |
4786 | | // We found a rotation. We need to slide HiV down by Rotation. Then we need |
4787 | | // to slide LoV up by (NumElts - Rotation). |
4788 | 0 | unsigned InvRotate = NumElts - Rotation; |
4789 | |
|
4790 | 0 | SDValue Res = DAG.getUNDEF(ContainerVT); |
4791 | 0 | if (HiV) { |
4792 | | // Even though we could use a smaller VL, don't to avoid a vsetivli |
4793 | | // toggle. |
4794 | 0 | Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, |
4795 | 0 | DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL); |
4796 | 0 | } |
4797 | 0 | if (LoV) |
4798 | 0 | Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, |
4799 | 0 | DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, |
4800 | 0 | RISCVII::TAIL_AGNOSTIC); |
4801 | |
|
4802 | 0 | return convertFromScalableVector(VT, Res, DAG, Subtarget); |
4803 | 0 | } |
4804 | | |
4805 | | // If this is a deinterleave and we can widen the vector, then we can use |
4806 | | // vnsrl to deinterleave. |
4807 | 0 | if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) { |
4808 | 0 | return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0, |
4809 | 0 | Subtarget, DAG); |
4810 | 0 | } |
4811 | | |
4812 | 0 | if (SDValue V = |
4813 | 0 | lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
4814 | 0 | return V; |
4815 | | |
4816 | | // Detect an interleave shuffle and lower to |
4817 | | // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) |
4818 | 0 | int EvenSrc, OddSrc; |
4819 | 0 | if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) { |
4820 | | // Extract the halves of the vectors. |
4821 | 0 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
4822 | |
|
4823 | 0 | int Size = Mask.size(); |
4824 | 0 | SDValue EvenV, OddV; |
4825 | 0 | assert(EvenSrc >= 0 && "Undef source?"); |
4826 | 0 | EvenV = (EvenSrc / Size) == 0 ? V1 : V2; |
4827 | 0 | EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV, |
4828 | 0 | DAG.getConstant(EvenSrc % Size, DL, XLenVT)); |
4829 | |
|
4830 | 0 | assert(OddSrc >= 0 && "Undef source?"); |
4831 | 0 | OddV = (OddSrc / Size) == 0 ? V1 : V2; |
4832 | 0 | OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV, |
4833 | 0 | DAG.getConstant(OddSrc % Size, DL, XLenVT)); |
4834 | |
|
4835 | 0 | return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget); |
4836 | 0 | } |
4837 | | |
4838 | | // Detect shuffles which can be re-expressed as vector selects; these are |
4839 | | // shuffles in which each element in the destination is taken from an element |
4840 | | // at the corresponding index in either source vectors. |
4841 | 0 | bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { |
4842 | 0 | int MaskIndex = MaskIdx.value(); |
4843 | 0 | return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; |
4844 | 0 | }); |
4845 | |
|
4846 | 0 | assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); |
4847 | | |
4848 | 0 | SmallVector<SDValue> MaskVals; |
4849 | | // As a backup, shuffles can be lowered via a vrgather instruction, possibly |
4850 | | // merged with a second vrgather. |
4851 | 0 | SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; |
4852 | | |
4853 | | // By default we preserve the original operand order, and use a mask to |
4854 | | // select LHS as true and RHS as false. However, since RVV vector selects may |
4855 | | // feature splats but only on the LHS, we may choose to invert our mask and |
4856 | | // instead select between RHS and LHS. |
4857 | 0 | bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); |
4858 | 0 | bool InvertMask = IsSelect == SwapOps; |
4859 | | |
4860 | | // Keep a track of which non-undef indices are used by each LHS/RHS shuffle |
4861 | | // half. |
4862 | 0 | DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; |
4863 | | |
4864 | | // Now construct the mask that will be used by the vselect or blended |
4865 | | // vrgather operation. For vrgathers, construct the appropriate indices into |
4866 | | // each vector. |
4867 | 0 | for (int MaskIndex : Mask) { |
4868 | 0 | bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; |
4869 | 0 | MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); |
4870 | 0 | if (!IsSelect) { |
4871 | 0 | bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; |
4872 | 0 | GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 |
4873 | 0 | ? DAG.getConstant(MaskIndex, DL, XLenVT) |
4874 | 0 | : DAG.getUNDEF(XLenVT)); |
4875 | 0 | GatherIndicesRHS.push_back( |
4876 | 0 | IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) |
4877 | 0 | : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); |
4878 | 0 | if (IsLHSOrUndefIndex && MaskIndex >= 0) |
4879 | 0 | ++LHSIndexCounts[MaskIndex]; |
4880 | 0 | if (!IsLHSOrUndefIndex) |
4881 | 0 | ++RHSIndexCounts[MaskIndex - NumElts]; |
4882 | 0 | } |
4883 | 0 | } |
4884 | |
|
4885 | 0 | if (SwapOps) { |
4886 | 0 | std::swap(V1, V2); |
4887 | 0 | std::swap(GatherIndicesLHS, GatherIndicesRHS); |
4888 | 0 | } |
4889 | |
|
4890 | 0 | assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); |
4891 | 0 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
4892 | 0 | SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); |
4893 | |
|
4894 | 0 | if (IsSelect) |
4895 | 0 | return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); |
4896 | | |
4897 | | // We might be able to express the shuffle as a bitrotate. But even if we |
4898 | | // don't have Zvkb and have to expand, the expanded sequence of approx. 2 |
4899 | | // shifts and a vor will have a higher throughput than a vrgather. |
4900 | 0 | if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget)) |
4901 | 0 | return V; |
4902 | | |
4903 | 0 | if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { |
4904 | | // On such a large vector we're unable to use i8 as the index type. |
4905 | | // FIXME: We could promote the index to i16 and use vrgatherei16, but that |
4906 | | // may involve vector splitting if we're already at LMUL=8, or our |
4907 | | // user-supplied maximum fixed-length LMUL. |
4908 | 0 | return SDValue(); |
4909 | 0 | } |
4910 | | |
4911 | 0 | unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; |
4912 | 0 | unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; |
4913 | 0 | MVT IndexVT = VT.changeTypeToInteger(); |
4914 | | // Since we can't introduce illegal index types at this stage, use i16 and |
4915 | | // vrgatherei16 if the corresponding index type for plain vrgather is greater |
4916 | | // than XLenVT. |
4917 | 0 | if (IndexVT.getScalarType().bitsGT(XLenVT)) { |
4918 | 0 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
4919 | 0 | IndexVT = IndexVT.changeVectorElementType(MVT::i16); |
4920 | 0 | } |
4921 | | |
4922 | | // If the mask allows, we can do all the index computation in 16 bits. This |
4923 | | // requires less work and less register pressure at high LMUL, and creates |
4924 | | // smaller constants which may be cheaper to materialize. |
4925 | 0 | if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) && |
4926 | 0 | (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) { |
4927 | 0 | GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; |
4928 | 0 | IndexVT = IndexVT.changeVectorElementType(MVT::i16); |
4929 | 0 | } |
4930 | |
|
4931 | 0 | MVT IndexContainerVT = |
4932 | 0 | ContainerVT.changeVectorElementType(IndexVT.getScalarType()); |
4933 | |
|
4934 | 0 | SDValue Gather; |
4935 | | // TODO: This doesn't trigger for i64 vectors on RV32, since there we |
4936 | | // encounter a bitcasted BUILD_VECTOR with low/high i32 values. |
4937 | 0 | if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { |
4938 | 0 | Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, |
4939 | 0 | Subtarget); |
4940 | 0 | } else { |
4941 | 0 | V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); |
4942 | | // If only one index is used, we can use a "splat" vrgather. |
4943 | | // TODO: We can splat the most-common index and fix-up any stragglers, if |
4944 | | // that's beneficial. |
4945 | 0 | if (LHSIndexCounts.size() == 1) { |
4946 | 0 | int SplatIndex = LHSIndexCounts.begin()->getFirst(); |
4947 | 0 | Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, |
4948 | 0 | DAG.getConstant(SplatIndex, DL, XLenVT), |
4949 | 0 | DAG.getUNDEF(ContainerVT), TrueMask, VL); |
4950 | 0 | } else { |
4951 | 0 | SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); |
4952 | 0 | LHSIndices = |
4953 | 0 | convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); |
4954 | |
|
4955 | 0 | Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, |
4956 | 0 | DAG.getUNDEF(ContainerVT), TrueMask, VL); |
4957 | 0 | } |
4958 | 0 | } |
4959 | | |
4960 | | // If a second vector operand is used by this shuffle, blend it in with an |
4961 | | // additional vrgather. |
4962 | 0 | if (!V2.isUndef()) { |
4963 | 0 | V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); |
4964 | |
|
4965 | 0 | MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); |
4966 | 0 | SelectMask = |
4967 | 0 | convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); |
4968 | | |
4969 | | // If only one index is used, we can use a "splat" vrgather. |
4970 | | // TODO: We can splat the most-common index and fix-up any stragglers, if |
4971 | | // that's beneficial. |
4972 | 0 | if (RHSIndexCounts.size() == 1) { |
4973 | 0 | int SplatIndex = RHSIndexCounts.begin()->getFirst(); |
4974 | 0 | Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, |
4975 | 0 | DAG.getConstant(SplatIndex, DL, XLenVT), Gather, |
4976 | 0 | SelectMask, VL); |
4977 | 0 | } else { |
4978 | 0 | SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); |
4979 | 0 | RHSIndices = |
4980 | 0 | convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); |
4981 | 0 | Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, |
4982 | 0 | SelectMask, VL); |
4983 | 0 | } |
4984 | 0 | } |
4985 | |
|
4986 | 0 | return convertFromScalableVector(VT, Gather, DAG, Subtarget); |
4987 | 0 | } |
4988 | | |
4989 | 0 | bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
4990 | | // Support splats for any type. These should type legalize well. |
4991 | 0 | if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) |
4992 | 0 | return true; |
4993 | | |
4994 | | // Only support legal VTs for other shuffles for now. |
4995 | 0 | if (!isTypeLegal(VT)) |
4996 | 0 | return false; |
4997 | | |
4998 | 0 | MVT SVT = VT.getSimpleVT(); |
4999 | | |
5000 | | // Not for i1 vectors. |
5001 | 0 | if (SVT.getScalarType() == MVT::i1) |
5002 | 0 | return false; |
5003 | | |
5004 | 0 | int Dummy1, Dummy2; |
5005 | 0 | return (isElementRotate(Dummy1, Dummy2, M) > 0) || |
5006 | 0 | isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget); |
5007 | 0 | } |
5008 | | |
5009 | | // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting |
5010 | | // the exponent. |
5011 | | SDValue |
5012 | | RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, |
5013 | 0 | SelectionDAG &DAG) const { |
5014 | 0 | MVT VT = Op.getSimpleValueType(); |
5015 | 0 | unsigned EltSize = VT.getScalarSizeInBits(); |
5016 | 0 | SDValue Src = Op.getOperand(0); |
5017 | 0 | SDLoc DL(Op); |
5018 | 0 | MVT ContainerVT = VT; |
5019 | |
|
5020 | 0 | SDValue Mask, VL; |
5021 | 0 | if (Op->isVPOpcode()) { |
5022 | 0 | Mask = Op.getOperand(1); |
5023 | 0 | if (VT.isFixedLengthVector()) |
5024 | 0 | Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, |
5025 | 0 | Subtarget); |
5026 | 0 | VL = Op.getOperand(2); |
5027 | 0 | } |
5028 | | |
5029 | | // We choose FP type that can represent the value if possible. Otherwise, we |
5030 | | // use rounding to zero conversion for correct exponent of the result. |
5031 | | // TODO: Use f16 for i8 when possible? |
5032 | 0 | MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; |
5033 | 0 | if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) |
5034 | 0 | FloatEltVT = MVT::f32; |
5035 | 0 | MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); |
5036 | | |
5037 | | // Legal types should have been checked in the RISCVTargetLowering |
5038 | | // constructor. |
5039 | | // TODO: Splitting may make sense in some cases. |
5040 | 0 | assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && |
5041 | 0 | "Expected legal float type!"); |
5042 | | |
5043 | | // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. |
5044 | | // The trailing zero count is equal to log2 of this single bit value. |
5045 | 0 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
5046 | 0 | SDValue Neg = DAG.getNegative(Src, DL, VT); |
5047 | 0 | Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); |
5048 | 0 | } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) { |
5049 | 0 | SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT), |
5050 | 0 | Src, Mask, VL); |
5051 | 0 | Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL); |
5052 | 0 | } |
5053 | | |
5054 | | // We have a legal FP type, convert to it. |
5055 | 0 | SDValue FloatVal; |
5056 | 0 | if (FloatVT.bitsGT(VT)) { |
5057 | 0 | if (Op->isVPOpcode()) |
5058 | 0 | FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL); |
5059 | 0 | else |
5060 | 0 | FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); |
5061 | 0 | } else { |
5062 | | // Use RTZ to avoid rounding influencing exponent of FloatVal. |
5063 | 0 | if (VT.isFixedLengthVector()) { |
5064 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
5065 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
5066 | 0 | } |
5067 | 0 | if (!Op->isVPOpcode()) |
5068 | 0 | std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
5069 | 0 | SDValue RTZRM = |
5070 | 0 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); |
5071 | 0 | MVT ContainerFloatVT = |
5072 | 0 | MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); |
5073 | 0 | FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, |
5074 | 0 | Src, Mask, RTZRM, VL); |
5075 | 0 | if (VT.isFixedLengthVector()) |
5076 | 0 | FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); |
5077 | 0 | } |
5078 | | // Bitcast to integer and shift the exponent to the LSB. |
5079 | 0 | EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); |
5080 | 0 | SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); |
5081 | 0 | unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; |
5082 | |
|
5083 | 0 | SDValue Exp; |
5084 | | // Restore back to original type. Truncation after SRL is to generate vnsrl. |
5085 | 0 | if (Op->isVPOpcode()) { |
5086 | 0 | Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast, |
5087 | 0 | DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL); |
5088 | 0 | Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL); |
5089 | 0 | } else { |
5090 | 0 | Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, |
5091 | 0 | DAG.getConstant(ShiftAmt, DL, IntVT)); |
5092 | 0 | if (IntVT.bitsLT(VT)) |
5093 | 0 | Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); |
5094 | 0 | else if (IntVT.bitsGT(VT)) |
5095 | 0 | Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); |
5096 | 0 | } |
5097 | | |
5098 | | // The exponent contains log2 of the value in biased form. |
5099 | 0 | unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; |
5100 | | // For trailing zeros, we just need to subtract the bias. |
5101 | 0 | if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) |
5102 | 0 | return DAG.getNode(ISD::SUB, DL, VT, Exp, |
5103 | 0 | DAG.getConstant(ExponentBias, DL, VT)); |
5104 | 0 | if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) |
5105 | 0 | return DAG.getNode(ISD::VP_SUB, DL, VT, Exp, |
5106 | 0 | DAG.getConstant(ExponentBias, DL, VT), Mask, VL); |
5107 | | |
5108 | | // For leading zeros, we need to remove the bias and convert from log2 to |
5109 | | // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). |
5110 | 0 | unsigned Adjust = ExponentBias + (EltSize - 1); |
5111 | 0 | SDValue Res; |
5112 | 0 | if (Op->isVPOpcode()) |
5113 | 0 | Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp, |
5114 | 0 | Mask, VL); |
5115 | 0 | else |
5116 | 0 | Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); |
5117 | | |
5118 | | // The above result with zero input equals to Adjust which is greater than |
5119 | | // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. |
5120 | 0 | if (Op.getOpcode() == ISD::CTLZ) |
5121 | 0 | Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); |
5122 | 0 | else if (Op.getOpcode() == ISD::VP_CTLZ) |
5123 | 0 | Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res, |
5124 | 0 | DAG.getConstant(EltSize, DL, VT), Mask, VL); |
5125 | 0 | return Res; |
5126 | 0 | } |
5127 | | |
5128 | | // While RVV has alignment restrictions, we should always be able to load as a |
5129 | | // legal equivalently-sized byte-typed vector instead. This method is |
5130 | | // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If |
5131 | | // the load is already correctly-aligned, it returns SDValue(). |
5132 | | SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, |
5133 | 0 | SelectionDAG &DAG) const { |
5134 | 0 | auto *Load = cast<LoadSDNode>(Op); |
5135 | 0 | assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); |
5136 | | |
5137 | 0 | if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
5138 | 0 | Load->getMemoryVT(), |
5139 | 0 | *Load->getMemOperand())) |
5140 | 0 | return SDValue(); |
5141 | | |
5142 | 0 | SDLoc DL(Op); |
5143 | 0 | MVT VT = Op.getSimpleValueType(); |
5144 | 0 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5145 | 0 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5146 | 0 | "Unexpected unaligned RVV load type"); |
5147 | 0 | MVT NewVT = |
5148 | 0 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
5149 | 0 | assert(NewVT.isValid() && |
5150 | 0 | "Expecting equally-sized RVV vector types to be legal"); |
5151 | 0 | SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), |
5152 | 0 | Load->getPointerInfo(), Load->getOriginalAlign(), |
5153 | 0 | Load->getMemOperand()->getFlags()); |
5154 | 0 | return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); |
5155 | 0 | } |
5156 | | |
5157 | | // While RVV has alignment restrictions, we should always be able to store as a |
5158 | | // legal equivalently-sized byte-typed vector instead. This method is |
5159 | | // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It |
5160 | | // returns SDValue() if the store is already correctly aligned. |
5161 | | SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, |
5162 | 0 | SelectionDAG &DAG) const { |
5163 | 0 | auto *Store = cast<StoreSDNode>(Op); |
5164 | 0 | assert(Store && Store->getValue().getValueType().isVector() && |
5165 | 0 | "Expected vector store"); |
5166 | | |
5167 | 0 | if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
5168 | 0 | Store->getMemoryVT(), |
5169 | 0 | *Store->getMemOperand())) |
5170 | 0 | return SDValue(); |
5171 | | |
5172 | 0 | SDLoc DL(Op); |
5173 | 0 | SDValue StoredVal = Store->getValue(); |
5174 | 0 | MVT VT = StoredVal.getSimpleValueType(); |
5175 | 0 | unsigned EltSizeBits = VT.getScalarSizeInBits(); |
5176 | 0 | assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && |
5177 | 0 | "Unexpected unaligned RVV store type"); |
5178 | 0 | MVT NewVT = |
5179 | 0 | MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); |
5180 | 0 | assert(NewVT.isValid() && |
5181 | 0 | "Expecting equally-sized RVV vector types to be legal"); |
5182 | 0 | StoredVal = DAG.getBitcast(NewVT, StoredVal); |
5183 | 0 | return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), |
5184 | 0 | Store->getPointerInfo(), Store->getOriginalAlign(), |
5185 | 0 | Store->getMemOperand()->getFlags()); |
5186 | 0 | } |
5187 | | |
5188 | | static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, |
5189 | 141k | const RISCVSubtarget &Subtarget) { |
5190 | 141k | assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); |
5191 | | |
5192 | 0 | int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); |
5193 | | |
5194 | | // All simm32 constants should be handled by isel. |
5195 | | // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making |
5196 | | // this check redundant, but small immediates are common so this check |
5197 | | // should have better compile time. |
5198 | 141k | if (isInt<32>(Imm)) |
5199 | 125k | return Op; |
5200 | | |
5201 | | // We only need to cost the immediate, if constant pool lowering is enabled. |
5202 | 16.0k | if (!Subtarget.useConstantPoolForLargeInts()) |
5203 | 0 | return Op; |
5204 | | |
5205 | 16.0k | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); |
5206 | 16.0k | if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) |
5207 | 16.0k | return Op; |
5208 | | |
5209 | | // Optimizations below are disabled for opt size. If we're optimizing for |
5210 | | // size, use a constant pool. |
5211 | 5 | if (DAG.shouldOptForSize()) |
5212 | 0 | return SDValue(); |
5213 | | |
5214 | | // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do |
5215 | | // that if it will avoid a constant pool. |
5216 | | // It will require an extra temporary register though. |
5217 | | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
5218 | | // low and high 32 bits are the same and bit 31 and 63 are set. |
5219 | 5 | unsigned ShiftAmt, AddOpc; |
5220 | 5 | RISCVMatInt::InstSeq SeqLo = |
5221 | 5 | RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); |
5222 | 5 | if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost()) |
5223 | 2 | return Op; |
5224 | | |
5225 | 3 | return SDValue(); |
5226 | 5 | } |
5227 | | |
5228 | | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, |
5229 | 0 | const RISCVSubtarget &Subtarget) { |
5230 | 0 | SDLoc dl(Op); |
5231 | 0 | AtomicOrdering FenceOrdering = |
5232 | 0 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); |
5233 | 0 | SyncScope::ID FenceSSID = |
5234 | 0 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); |
5235 | |
|
5236 | 0 | if (Subtarget.hasStdExtZtso()) { |
5237 | | // The only fence that needs an instruction is a sequentially-consistent |
5238 | | // cross-thread fence. |
5239 | 0 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
5240 | 0 | FenceSSID == SyncScope::System) |
5241 | 0 | return Op; |
5242 | | |
5243 | | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5244 | 0 | return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
5245 | 0 | } |
5246 | | |
5247 | | // singlethread fences only synchronize with signal handlers on the same |
5248 | | // thread and thus only need to preserve instruction order, not actually |
5249 | | // enforce memory ordering. |
5250 | 0 | if (FenceSSID == SyncScope::SingleThread) |
5251 | | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5252 | 0 | return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
5253 | | |
5254 | 0 | return Op; |
5255 | 0 | } |
5256 | | |
5257 | | SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, |
5258 | 0 | SelectionDAG &DAG) const { |
5259 | 0 | SDLoc DL(Op); |
5260 | 0 | MVT VT = Op.getSimpleValueType(); |
5261 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
5262 | 0 | unsigned Check = Op.getConstantOperandVal(1); |
5263 | 0 | unsigned TDCMask = 0; |
5264 | 0 | if (Check & fcSNan) |
5265 | 0 | TDCMask |= RISCV::FPMASK_Signaling_NaN; |
5266 | 0 | if (Check & fcQNan) |
5267 | 0 | TDCMask |= RISCV::FPMASK_Quiet_NaN; |
5268 | 0 | if (Check & fcPosInf) |
5269 | 0 | TDCMask |= RISCV::FPMASK_Positive_Infinity; |
5270 | 0 | if (Check & fcNegInf) |
5271 | 0 | TDCMask |= RISCV::FPMASK_Negative_Infinity; |
5272 | 0 | if (Check & fcPosNormal) |
5273 | 0 | TDCMask |= RISCV::FPMASK_Positive_Normal; |
5274 | 0 | if (Check & fcNegNormal) |
5275 | 0 | TDCMask |= RISCV::FPMASK_Negative_Normal; |
5276 | 0 | if (Check & fcPosSubnormal) |
5277 | 0 | TDCMask |= RISCV::FPMASK_Positive_Subnormal; |
5278 | 0 | if (Check & fcNegSubnormal) |
5279 | 0 | TDCMask |= RISCV::FPMASK_Negative_Subnormal; |
5280 | 0 | if (Check & fcPosZero) |
5281 | 0 | TDCMask |= RISCV::FPMASK_Positive_Zero; |
5282 | 0 | if (Check & fcNegZero) |
5283 | 0 | TDCMask |= RISCV::FPMASK_Negative_Zero; |
5284 | |
|
5285 | 0 | bool IsOneBitMask = isPowerOf2_32(TDCMask); |
5286 | |
|
5287 | 0 | SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT); |
5288 | |
|
5289 | 0 | if (VT.isVector()) { |
5290 | 0 | SDValue Op0 = Op.getOperand(0); |
5291 | 0 | MVT VT0 = Op.getOperand(0).getSimpleValueType(); |
5292 | |
|
5293 | 0 | if (VT.isScalableVector()) { |
5294 | 0 | MVT DstVT = VT0.changeVectorElementTypeToInteger(); |
5295 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget); |
5296 | 0 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5297 | 0 | Mask = Op.getOperand(2); |
5298 | 0 | VL = Op.getOperand(3); |
5299 | 0 | } |
5300 | 0 | SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask, |
5301 | 0 | VL, Op->getFlags()); |
5302 | 0 | if (IsOneBitMask) |
5303 | 0 | return DAG.getSetCC(DL, VT, FPCLASS, |
5304 | 0 | DAG.getConstant(TDCMask, DL, DstVT), |
5305 | 0 | ISD::CondCode::SETEQ); |
5306 | 0 | SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS, |
5307 | 0 | DAG.getConstant(TDCMask, DL, DstVT)); |
5308 | 0 | return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT), |
5309 | 0 | ISD::SETNE); |
5310 | 0 | } |
5311 | | |
5312 | 0 | MVT ContainerVT0 = getContainerForFixedLengthVector(VT0); |
5313 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
5314 | 0 | MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger(); |
5315 | 0 | auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget); |
5316 | 0 | if (Op.getOpcode() == ISD::VP_IS_FPCLASS) { |
5317 | 0 | Mask = Op.getOperand(2); |
5318 | 0 | MVT MaskContainerVT = |
5319 | 0 | getContainerForFixedLengthVector(Mask.getSimpleValueType()); |
5320 | 0 | Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); |
5321 | 0 | VL = Op.getOperand(3); |
5322 | 0 | } |
5323 | 0 | Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget); |
5324 | |
|
5325 | 0 | SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0, |
5326 | 0 | Mask, VL, Op->getFlags()); |
5327 | |
|
5328 | 0 | TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, |
5329 | 0 | DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL); |
5330 | 0 | if (IsOneBitMask) { |
5331 | 0 | SDValue VMSEQ = |
5332 | 0 | DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, |
5333 | 0 | {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ), |
5334 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL}); |
5335 | 0 | return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget); |
5336 | 0 | } |
5337 | 0 | SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS, |
5338 | 0 | TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL); |
5339 | |
|
5340 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); |
5341 | 0 | SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT, |
5342 | 0 | DAG.getUNDEF(ContainerDstVT), SplatZero, VL); |
5343 | |
|
5344 | 0 | SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, |
5345 | 0 | {AND, SplatZero, DAG.getCondCode(ISD::SETNE), |
5346 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL}); |
5347 | 0 | return convertFromScalableVector(VT, VMSNE, DAG, Subtarget); |
5348 | 0 | } |
5349 | | |
5350 | 0 | SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0)); |
5351 | 0 | SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV); |
5352 | 0 | SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT), |
5353 | 0 | ISD::CondCode::SETNE); |
5354 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); |
5355 | 0 | } |
5356 | | |
5357 | | // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these |
5358 | | // operations propagate nans. |
5359 | | static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, |
5360 | 0 | const RISCVSubtarget &Subtarget) { |
5361 | 0 | SDLoc DL(Op); |
5362 | 0 | MVT VT = Op.getSimpleValueType(); |
5363 | |
|
5364 | 0 | SDValue X = Op.getOperand(0); |
5365 | 0 | SDValue Y = Op.getOperand(1); |
5366 | |
|
5367 | 0 | if (!VT.isVector()) { |
5368 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
5369 | | |
5370 | | // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This |
5371 | | // ensures that when one input is a nan, the other will also be a nan |
5372 | | // allowing the nan to propagate. If both inputs are nan, this will swap the |
5373 | | // inputs which is harmless. |
5374 | |
|
5375 | 0 | SDValue NewY = Y; |
5376 | 0 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { |
5377 | 0 | SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); |
5378 | 0 | NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); |
5379 | 0 | } |
5380 | |
|
5381 | 0 | SDValue NewX = X; |
5382 | 0 | if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { |
5383 | 0 | SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); |
5384 | 0 | NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); |
5385 | 0 | } |
5386 | |
|
5387 | 0 | unsigned Opc = |
5388 | 0 | Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; |
5389 | 0 | return DAG.getNode(Opc, DL, VT, NewX, NewY); |
5390 | 0 | } |
5391 | | |
5392 | | // Check no NaNs before converting to fixed vector scalable. |
5393 | 0 | bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); |
5394 | 0 | bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); |
5395 | |
|
5396 | 0 | MVT ContainerVT = VT; |
5397 | 0 | if (VT.isFixedLengthVector()) { |
5398 | 0 | ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); |
5399 | 0 | X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); |
5400 | 0 | Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); |
5401 | 0 | } |
5402 | |
|
5403 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
5404 | |
|
5405 | 0 | SDValue NewY = Y; |
5406 | 0 | if (!XIsNeverNan) { |
5407 | 0 | SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), |
5408 | 0 | {X, X, DAG.getCondCode(ISD::SETOEQ), |
5409 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL}); |
5410 | 0 | NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X, |
5411 | 0 | DAG.getUNDEF(ContainerVT), VL); |
5412 | 0 | } |
5413 | |
|
5414 | 0 | SDValue NewX = X; |
5415 | 0 | if (!YIsNeverNan) { |
5416 | 0 | SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), |
5417 | 0 | {Y, Y, DAG.getCondCode(ISD::SETOEQ), |
5418 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL}); |
5419 | 0 | NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y, |
5420 | 0 | DAG.getUNDEF(ContainerVT), VL); |
5421 | 0 | } |
5422 | |
|
5423 | 0 | unsigned Opc = |
5424 | 0 | Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL; |
5425 | 0 | SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, |
5426 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL); |
5427 | 0 | if (VT.isFixedLengthVector()) |
5428 | 0 | Res = convertFromScalableVector(VT, Res, DAG, Subtarget); |
5429 | 0 | return Res; |
5430 | 0 | } |
5431 | | |
5432 | | /// Get a RISC-V target specified VL op for a given SDNode. |
5433 | 0 | static unsigned getRISCVVLOp(SDValue Op) { |
5434 | 0 | #define OP_CASE(NODE) \ |
5435 | 0 | case ISD::NODE: \ |
5436 | 0 | return RISCVISD::NODE##_VL; |
5437 | 0 | #define VP_CASE(NODE) \ |
5438 | 0 | case ISD::VP_##NODE: \ |
5439 | 0 | return RISCVISD::NODE##_VL; |
5440 | | // clang-format off |
5441 | 0 | switch (Op.getOpcode()) { |
5442 | 0 | default: |
5443 | 0 | llvm_unreachable("don't have RISC-V specified VL op for this SDNode"); |
5444 | 0 | OP_CASE(ADD) |
5445 | 0 | OP_CASE(SUB) |
5446 | 0 | OP_CASE(MUL) |
5447 | 0 | OP_CASE(MULHS) |
5448 | 0 | OP_CASE(MULHU) |
5449 | 0 | OP_CASE(SDIV) |
5450 | 0 | OP_CASE(SREM) |
5451 | 0 | OP_CASE(UDIV) |
5452 | 0 | OP_CASE(UREM) |
5453 | 0 | OP_CASE(SHL) |
5454 | 0 | OP_CASE(SRA) |
5455 | 0 | OP_CASE(SRL) |
5456 | 0 | OP_CASE(ROTL) |
5457 | 0 | OP_CASE(ROTR) |
5458 | 0 | OP_CASE(BSWAP) |
5459 | 0 | OP_CASE(CTTZ) |
5460 | 0 | OP_CASE(CTLZ) |
5461 | 0 | OP_CASE(CTPOP) |
5462 | 0 | OP_CASE(BITREVERSE) |
5463 | 0 | OP_CASE(SADDSAT) |
5464 | 0 | OP_CASE(UADDSAT) |
5465 | 0 | OP_CASE(SSUBSAT) |
5466 | 0 | OP_CASE(USUBSAT) |
5467 | 0 | OP_CASE(AVGFLOORU) |
5468 | 0 | OP_CASE(AVGCEILU) |
5469 | 0 | OP_CASE(FADD) |
5470 | 0 | OP_CASE(FSUB) |
5471 | 0 | OP_CASE(FMUL) |
5472 | 0 | OP_CASE(FDIV) |
5473 | 0 | OP_CASE(FNEG) |
5474 | 0 | OP_CASE(FABS) |
5475 | 0 | OP_CASE(FSQRT) |
5476 | 0 | OP_CASE(SMIN) |
5477 | 0 | OP_CASE(SMAX) |
5478 | 0 | OP_CASE(UMIN) |
5479 | 0 | OP_CASE(UMAX) |
5480 | 0 | OP_CASE(STRICT_FADD) |
5481 | 0 | OP_CASE(STRICT_FSUB) |
5482 | 0 | OP_CASE(STRICT_FMUL) |
5483 | 0 | OP_CASE(STRICT_FDIV) |
5484 | 0 | OP_CASE(STRICT_FSQRT) |
5485 | | VP_CASE(ADD) // VP_ADD |
5486 | | VP_CASE(SUB) // VP_SUB |
5487 | | VP_CASE(MUL) // VP_MUL |
5488 | | VP_CASE(SDIV) // VP_SDIV |
5489 | | VP_CASE(SREM) // VP_SREM |
5490 | | VP_CASE(UDIV) // VP_UDIV |
5491 | | VP_CASE(UREM) // VP_UREM |
5492 | | VP_CASE(SHL) // VP_SHL |
5493 | | VP_CASE(FADD) // VP_FADD |
5494 | | VP_CASE(FSUB) // VP_FSUB |
5495 | | VP_CASE(FMUL) // VP_FMUL |
5496 | | VP_CASE(FDIV) // VP_FDIV |
5497 | | VP_CASE(FNEG) // VP_FNEG |
5498 | | VP_CASE(FABS) // VP_FABS |
5499 | | VP_CASE(SMIN) // VP_SMIN |
5500 | | VP_CASE(SMAX) // VP_SMAX |
5501 | | VP_CASE(UMIN) // VP_UMIN |
5502 | | VP_CASE(UMAX) // VP_UMAX |
5503 | | VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN |
5504 | | VP_CASE(SETCC) // VP_SETCC |
5505 | | VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP |
5506 | | VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP |
5507 | | VP_CASE(BITREVERSE) // VP_BITREVERSE |
5508 | | VP_CASE(BSWAP) // VP_BSWAP |
5509 | | VP_CASE(CTLZ) // VP_CTLZ |
5510 | | VP_CASE(CTTZ) // VP_CTTZ |
5511 | | VP_CASE(CTPOP) // VP_CTPOP |
5512 | 0 | case ISD::CTLZ_ZERO_UNDEF: |
5513 | 0 | case ISD::VP_CTLZ_ZERO_UNDEF: |
5514 | 0 | return RISCVISD::CTLZ_VL; |
5515 | 0 | case ISD::CTTZ_ZERO_UNDEF: |
5516 | 0 | case ISD::VP_CTTZ_ZERO_UNDEF: |
5517 | 0 | return RISCVISD::CTTZ_VL; |
5518 | 0 | case ISD::FMA: |
5519 | 0 | case ISD::VP_FMA: |
5520 | 0 | return RISCVISD::VFMADD_VL; |
5521 | 0 | case ISD::STRICT_FMA: |
5522 | 0 | return RISCVISD::STRICT_VFMADD_VL; |
5523 | 0 | case ISD::AND: |
5524 | 0 | case ISD::VP_AND: |
5525 | 0 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5526 | 0 | return RISCVISD::VMAND_VL; |
5527 | 0 | return RISCVISD::AND_VL; |
5528 | 0 | case ISD::OR: |
5529 | 0 | case ISD::VP_OR: |
5530 | 0 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5531 | 0 | return RISCVISD::VMOR_VL; |
5532 | 0 | return RISCVISD::OR_VL; |
5533 | 0 | case ISD::XOR: |
5534 | 0 | case ISD::VP_XOR: |
5535 | 0 | if (Op.getSimpleValueType().getVectorElementType() == MVT::i1) |
5536 | 0 | return RISCVISD::VMXOR_VL; |
5537 | 0 | return RISCVISD::XOR_VL; |
5538 | 0 | case ISD::VP_SELECT: |
5539 | 0 | case ISD::VP_MERGE: |
5540 | 0 | return RISCVISD::VMERGE_VL; |
5541 | 0 | case ISD::VP_ASHR: |
5542 | 0 | return RISCVISD::SRA_VL; |
5543 | 0 | case ISD::VP_LSHR: |
5544 | 0 | return RISCVISD::SRL_VL; |
5545 | 0 | case ISD::VP_SQRT: |
5546 | 0 | return RISCVISD::FSQRT_VL; |
5547 | 0 | case ISD::VP_SIGN_EXTEND: |
5548 | 0 | return RISCVISD::VSEXT_VL; |
5549 | 0 | case ISD::VP_ZERO_EXTEND: |
5550 | 0 | return RISCVISD::VZEXT_VL; |
5551 | 0 | case ISD::VP_FP_TO_SINT: |
5552 | 0 | return RISCVISD::VFCVT_RTZ_X_F_VL; |
5553 | 0 | case ISD::VP_FP_TO_UINT: |
5554 | 0 | return RISCVISD::VFCVT_RTZ_XU_F_VL; |
5555 | 0 | case ISD::FMINNUM: |
5556 | 0 | case ISD::VP_FMINNUM: |
5557 | 0 | return RISCVISD::VFMIN_VL; |
5558 | 0 | case ISD::FMAXNUM: |
5559 | 0 | case ISD::VP_FMAXNUM: |
5560 | 0 | return RISCVISD::VFMAX_VL; |
5561 | 0 | } |
5562 | | // clang-format on |
5563 | 0 | #undef OP_CASE |
5564 | 0 | #undef VP_CASE |
5565 | 0 | } |
5566 | | |
5567 | | /// Return true if a RISC-V target specified op has a merge operand. |
5568 | 0 | static bool hasMergeOp(unsigned Opcode) { |
5569 | 0 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
5570 | 0 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
5571 | 0 | "not a RISC-V target specific op"); |
5572 | 0 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
5573 | 0 | 126 && |
5574 | 0 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
5575 | 0 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
5576 | 0 | 21 && |
5577 | 0 | "adding target specific op should update this function"); |
5578 | 0 | if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) |
5579 | 0 | return true; |
5580 | 0 | if (Opcode == RISCVISD::FCOPYSIGN_VL) |
5581 | 0 | return true; |
5582 | 0 | if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL) |
5583 | 0 | return true; |
5584 | 0 | if (Opcode == RISCVISD::SETCC_VL) |
5585 | 0 | return true; |
5586 | 0 | if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL) |
5587 | 0 | return true; |
5588 | 0 | if (Opcode == RISCVISD::VMERGE_VL) |
5589 | 0 | return true; |
5590 | 0 | return false; |
5591 | 0 | } |
5592 | | |
5593 | | /// Return true if a RISC-V target specified op has a mask operand. |
5594 | 0 | static bool hasMaskOp(unsigned Opcode) { |
5595 | 0 | assert(Opcode > RISCVISD::FIRST_NUMBER && |
5596 | 0 | Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && |
5597 | 0 | "not a RISC-V target specific op"); |
5598 | 0 | static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == |
5599 | 0 | 126 && |
5600 | 0 | RISCVISD::LAST_RISCV_STRICTFP_OPCODE - |
5601 | 0 | ISD::FIRST_TARGET_STRICTFP_OPCODE == |
5602 | 0 | 21 && |
5603 | 0 | "adding target specific op should update this function"); |
5604 | 0 | if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) |
5605 | 0 | return true; |
5606 | 0 | if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL) |
5607 | 0 | return true; |
5608 | 0 | if (Opcode >= RISCVISD::STRICT_FADD_VL && |
5609 | 0 | Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL) |
5610 | 0 | return true; |
5611 | 0 | return false; |
5612 | 0 | } |
5613 | | |
5614 | 0 | static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { |
5615 | 0 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); |
5616 | 0 | SDLoc DL(Op); |
5617 | |
|
5618 | 0 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
5619 | 0 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
5620 | |
|
5621 | 0 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
5622 | 0 | if (!Op.getOperand(j).getValueType().isVector()) { |
5623 | 0 | LoOperands[j] = Op.getOperand(j); |
5624 | 0 | HiOperands[j] = Op.getOperand(j); |
5625 | 0 | continue; |
5626 | 0 | } |
5627 | 0 | std::tie(LoOperands[j], HiOperands[j]) = |
5628 | 0 | DAG.SplitVector(Op.getOperand(j), DL); |
5629 | 0 | } |
5630 | |
|
5631 | 0 | SDValue LoRes = |
5632 | 0 | DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); |
5633 | 0 | SDValue HiRes = |
5634 | 0 | DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); |
5635 | |
|
5636 | 0 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); |
5637 | 0 | } |
5638 | | |
5639 | 0 | static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) { |
5640 | 0 | assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op"); |
5641 | 0 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType()); |
5642 | 0 | SDLoc DL(Op); |
5643 | |
|
5644 | 0 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
5645 | 0 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
5646 | |
|
5647 | 0 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
5648 | 0 | if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) { |
5649 | 0 | std::tie(LoOperands[j], HiOperands[j]) = |
5650 | 0 | DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL); |
5651 | 0 | continue; |
5652 | 0 | } |
5653 | 0 | if (!Op.getOperand(j).getValueType().isVector()) { |
5654 | 0 | LoOperands[j] = Op.getOperand(j); |
5655 | 0 | HiOperands[j] = Op.getOperand(j); |
5656 | 0 | continue; |
5657 | 0 | } |
5658 | 0 | std::tie(LoOperands[j], HiOperands[j]) = |
5659 | 0 | DAG.SplitVector(Op.getOperand(j), DL); |
5660 | 0 | } |
5661 | |
|
5662 | 0 | SDValue LoRes = |
5663 | 0 | DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags()); |
5664 | 0 | SDValue HiRes = |
5665 | 0 | DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags()); |
5666 | |
|
5667 | 0 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes); |
5668 | 0 | } |
5669 | | |
5670 | 0 | static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) { |
5671 | 0 | SDLoc DL(Op); |
5672 | |
|
5673 | 0 | auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL); |
5674 | 0 | auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL); |
5675 | 0 | auto [EVLLo, EVLHi] = |
5676 | 0 | DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL); |
5677 | |
|
5678 | 0 | SDValue ResLo = |
5679 | 0 | DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), |
5680 | 0 | {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags()); |
5681 | 0 | return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), |
5682 | 0 | {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags()); |
5683 | 0 | } |
5684 | | |
5685 | 0 | static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) { |
5686 | |
|
5687 | 0 | assert(Op->isStrictFPOpcode()); |
5688 | | |
5689 | 0 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0)); |
5690 | |
|
5691 | 0 | SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1)); |
5692 | 0 | SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1)); |
5693 | |
|
5694 | 0 | SDLoc DL(Op); |
5695 | |
|
5696 | 0 | SmallVector<SDValue, 4> LoOperands(Op.getNumOperands()); |
5697 | 0 | SmallVector<SDValue, 4> HiOperands(Op.getNumOperands()); |
5698 | |
|
5699 | 0 | for (unsigned j = 0; j != Op.getNumOperands(); ++j) { |
5700 | 0 | if (!Op.getOperand(j).getValueType().isVector()) { |
5701 | 0 | LoOperands[j] = Op.getOperand(j); |
5702 | 0 | HiOperands[j] = Op.getOperand(j); |
5703 | 0 | continue; |
5704 | 0 | } |
5705 | 0 | std::tie(LoOperands[j], HiOperands[j]) = |
5706 | 0 | DAG.SplitVector(Op.getOperand(j), DL); |
5707 | 0 | } |
5708 | |
|
5709 | 0 | SDValue LoRes = |
5710 | 0 | DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags()); |
5711 | 0 | HiOperands[0] = LoRes.getValue(1); |
5712 | 0 | SDValue HiRes = |
5713 | 0 | DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags()); |
5714 | |
|
5715 | 0 | SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0), |
5716 | 0 | LoRes.getValue(0), HiRes.getValue(0)); |
5717 | 0 | return DAG.getMergeValues({V, HiRes.getValue(1)}, DL); |
5718 | 0 | } |
5719 | | |
5720 | | SDValue RISCVTargetLowering::LowerOperation(SDValue Op, |
5721 | 219k | SelectionDAG &DAG) const { |
5722 | 219k | switch (Op.getOpcode()) { |
5723 | 0 | default: |
5724 | 0 | report_fatal_error("unimplemented operand"); |
5725 | 0 | case ISD::ATOMIC_FENCE: |
5726 | 0 | return LowerATOMIC_FENCE(Op, DAG, Subtarget); |
5727 | 62.6k | case ISD::GlobalAddress: |
5728 | 62.6k | return lowerGlobalAddress(Op, DAG); |
5729 | 0 | case ISD::BlockAddress: |
5730 | 0 | return lowerBlockAddress(Op, DAG); |
5731 | 3 | case ISD::ConstantPool: |
5732 | 3 | return lowerConstantPool(Op, DAG); |
5733 | 0 | case ISD::JumpTable: |
5734 | 0 | return lowerJumpTable(Op, DAG); |
5735 | 0 | case ISD::GlobalTLSAddress: |
5736 | 0 | return lowerGlobalTLSAddress(Op, DAG); |
5737 | 141k | case ISD::Constant: |
5738 | 141k | return lowerConstant(Op, DAG, Subtarget); |
5739 | 663 | case ISD::SELECT: |
5740 | 663 | return lowerSELECT(Op, DAG); |
5741 | 2.98k | case ISD::BRCOND: |
5742 | 2.98k | return lowerBRCOND(Op, DAG); |
5743 | 0 | case ISD::VASTART: |
5744 | 0 | return lowerVASTART(Op, DAG); |
5745 | 0 | case ISD::FRAMEADDR: |
5746 | 0 | return lowerFRAMEADDR(Op, DAG); |
5747 | 0 | case ISD::RETURNADDR: |
5748 | 0 | return lowerRETURNADDR(Op, DAG); |
5749 | 0 | case ISD::SHL_PARTS: |
5750 | 0 | return lowerShiftLeftParts(Op, DAG); |
5751 | 0 | case ISD::SRA_PARTS: |
5752 | 0 | return lowerShiftRightParts(Op, DAG, true); |
5753 | 0 | case ISD::SRL_PARTS: |
5754 | 0 | return lowerShiftRightParts(Op, DAG, false); |
5755 | 0 | case ISD::ROTL: |
5756 | 0 | case ISD::ROTR: |
5757 | 0 | if (Op.getValueType().isFixedLengthVector()) { |
5758 | 0 | assert(Subtarget.hasStdExtZvkb()); |
5759 | 0 | return lowerToScalableOp(Op, DAG); |
5760 | 0 | } |
5761 | 0 | assert(Subtarget.hasVendorXTHeadBb() && |
5762 | 0 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && |
5763 | 0 | "Unexpected custom legalization"); |
5764 | | // XTHeadBb only supports rotate by constant. |
5765 | 0 | if (!isa<ConstantSDNode>(Op.getOperand(1))) |
5766 | 0 | return SDValue(); |
5767 | 0 | return Op; |
5768 | 0 | case ISD::BITCAST: { |
5769 | 0 | SDLoc DL(Op); |
5770 | 0 | EVT VT = Op.getValueType(); |
5771 | 0 | SDValue Op0 = Op.getOperand(0); |
5772 | 0 | EVT Op0VT = Op0.getValueType(); |
5773 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
5774 | 0 | if (VT == MVT::f16 && Op0VT == MVT::i16 && |
5775 | 0 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
5776 | 0 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); |
5777 | 0 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); |
5778 | 0 | return FPConv; |
5779 | 0 | } |
5780 | 0 | if (VT == MVT::bf16 && Op0VT == MVT::i16 && |
5781 | 0 | Subtarget.hasStdExtZfbfmin()) { |
5782 | 0 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); |
5783 | 0 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0); |
5784 | 0 | return FPConv; |
5785 | 0 | } |
5786 | 0 | if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && |
5787 | 0 | Subtarget.hasStdExtFOrZfinx()) { |
5788 | 0 | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
5789 | 0 | SDValue FPConv = |
5790 | 0 | DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); |
5791 | 0 | return FPConv; |
5792 | 0 | } |
5793 | 0 | if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 && |
5794 | 0 | Subtarget.hasStdExtZfa()) { |
5795 | 0 | SDValue Lo, Hi; |
5796 | 0 | std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32); |
5797 | 0 | SDValue RetReg = |
5798 | 0 | DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); |
5799 | 0 | return RetReg; |
5800 | 0 | } |
5801 | | |
5802 | | // Consider other scalar<->scalar casts as legal if the types are legal. |
5803 | | // Otherwise expand them. |
5804 | 0 | if (!VT.isVector() && !Op0VT.isVector()) { |
5805 | 0 | if (isTypeLegal(VT) && isTypeLegal(Op0VT)) |
5806 | 0 | return Op; |
5807 | 0 | return SDValue(); |
5808 | 0 | } |
5809 | | |
5810 | 0 | assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && |
5811 | 0 | "Unexpected types"); |
5812 | | |
5813 | 0 | if (VT.isFixedLengthVector()) { |
5814 | | // We can handle fixed length vector bitcasts with a simple replacement |
5815 | | // in isel. |
5816 | 0 | if (Op0VT.isFixedLengthVector()) |
5817 | 0 | return Op; |
5818 | | // When bitcasting from scalar to fixed-length vector, insert the scalar |
5819 | | // into a one-element vector of the result type, and perform a vector |
5820 | | // bitcast. |
5821 | 0 | if (!Op0VT.isVector()) { |
5822 | 0 | EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); |
5823 | 0 | if (!isTypeLegal(BVT)) |
5824 | 0 | return SDValue(); |
5825 | 0 | return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, |
5826 | 0 | DAG.getUNDEF(BVT), Op0, |
5827 | 0 | DAG.getConstant(0, DL, XLenVT))); |
5828 | 0 | } |
5829 | 0 | return SDValue(); |
5830 | 0 | } |
5831 | | // Custom-legalize bitcasts from fixed-length vector types to scalar types |
5832 | | // thus: bitcast the vector to a one-element vector type whose element type |
5833 | | // is the same as the result type, and extract the first element. |
5834 | 0 | if (!VT.isVector() && Op0VT.isFixedLengthVector()) { |
5835 | 0 | EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); |
5836 | 0 | if (!isTypeLegal(BVT)) |
5837 | 0 | return SDValue(); |
5838 | 0 | SDValue BVec = DAG.getBitcast(BVT, Op0); |
5839 | 0 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, |
5840 | 0 | DAG.getConstant(0, DL, XLenVT)); |
5841 | 0 | } |
5842 | 0 | return SDValue(); |
5843 | 0 | } |
5844 | 0 | case ISD::INTRINSIC_WO_CHAIN: |
5845 | 0 | return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
5846 | 0 | case ISD::INTRINSIC_W_CHAIN: |
5847 | 0 | return LowerINTRINSIC_W_CHAIN(Op, DAG); |
5848 | 0 | case ISD::INTRINSIC_VOID: |
5849 | 0 | return LowerINTRINSIC_VOID(Op, DAG); |
5850 | 0 | case ISD::IS_FPCLASS: |
5851 | 0 | return LowerIS_FPCLASS(Op, DAG); |
5852 | 0 | case ISD::BITREVERSE: { |
5853 | 0 | MVT VT = Op.getSimpleValueType(); |
5854 | 0 | if (VT.isFixedLengthVector()) { |
5855 | 0 | assert(Subtarget.hasStdExtZvbb()); |
5856 | 0 | return lowerToScalableOp(Op, DAG); |
5857 | 0 | } |
5858 | 0 | SDLoc DL(Op); |
5859 | 0 | assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); |
5860 | 0 | assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); |
5861 | | // Expand bitreverse to a bswap(rev8) followed by brev8. |
5862 | 0 | SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); |
5863 | 0 | return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); |
5864 | 0 | } |
5865 | 0 | case ISD::TRUNCATE: |
5866 | | // Only custom-lower vector truncates |
5867 | 0 | if (!Op.getSimpleValueType().isVector()) |
5868 | 0 | return Op; |
5869 | 0 | return lowerVectorTruncLike(Op, DAG); |
5870 | 0 | case ISD::ANY_EXTEND: |
5871 | 0 | case ISD::ZERO_EXTEND: |
5872 | 0 | if (Op.getOperand(0).getValueType().isVector() && |
5873 | 0 | Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
5874 | 0 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); |
5875 | 0 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); |
5876 | 0 | case ISD::SIGN_EXTEND: |
5877 | 0 | if (Op.getOperand(0).getValueType().isVector() && |
5878 | 0 | Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
5879 | 0 | return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); |
5880 | 0 | return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); |
5881 | 0 | case ISD::SPLAT_VECTOR_PARTS: |
5882 | 0 | return lowerSPLAT_VECTOR_PARTS(Op, DAG); |
5883 | 0 | case ISD::INSERT_VECTOR_ELT: |
5884 | 0 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
5885 | 0 | case ISD::EXTRACT_VECTOR_ELT: |
5886 | 0 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
5887 | 0 | case ISD::SCALAR_TO_VECTOR: { |
5888 | 0 | MVT VT = Op.getSimpleValueType(); |
5889 | 0 | SDLoc DL(Op); |
5890 | 0 | SDValue Scalar = Op.getOperand(0); |
5891 | 0 | if (VT.getVectorElementType() == MVT::i1) { |
5892 | 0 | MVT WideVT = VT.changeVectorElementType(MVT::i8); |
5893 | 0 | SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); |
5894 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, V); |
5895 | 0 | } |
5896 | 0 | MVT ContainerVT = VT; |
5897 | 0 | if (VT.isFixedLengthVector()) |
5898 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
5899 | 0 | SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
5900 | 0 | Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar); |
5901 | 0 | SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, |
5902 | 0 | DAG.getUNDEF(ContainerVT), Scalar, VL); |
5903 | 0 | if (VT.isFixedLengthVector()) |
5904 | 0 | V = convertFromScalableVector(VT, V, DAG, Subtarget); |
5905 | 0 | return V; |
5906 | 0 | } |
5907 | 0 | case ISD::VSCALE: { |
5908 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
5909 | 0 | MVT VT = Op.getSimpleValueType(); |
5910 | 0 | SDLoc DL(Op); |
5911 | 0 | SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); |
5912 | | // We define our scalable vector types for lmul=1 to use a 64 bit known |
5913 | | // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate |
5914 | | // vscale as VLENB / 8. |
5915 | 0 | static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); |
5916 | 0 | if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
5917 | 0 | report_fatal_error("Support for VLEN==32 is incomplete."); |
5918 | | // We assume VLENB is a multiple of 8. We manually choose the best shift |
5919 | | // here because SimplifyDemandedBits isn't always able to simplify it. |
5920 | 0 | uint64_t Val = Op.getConstantOperandVal(0); |
5921 | 0 | if (isPowerOf2_64(Val)) { |
5922 | 0 | uint64_t Log2 = Log2_64(Val); |
5923 | 0 | if (Log2 < 3) |
5924 | 0 | Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res, |
5925 | 0 | DAG.getConstant(3 - Log2, DL, VT)); |
5926 | 0 | else if (Log2 > 3) |
5927 | 0 | Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res, |
5928 | 0 | DAG.getConstant(Log2 - 3, DL, XLenVT)); |
5929 | 0 | } else if ((Val % 8) == 0) { |
5930 | | // If the multiplier is a multiple of 8, scale it down to avoid needing |
5931 | | // to shift the VLENB value. |
5932 | 0 | Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res, |
5933 | 0 | DAG.getConstant(Val / 8, DL, XLenVT)); |
5934 | 0 | } else { |
5935 | 0 | SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res, |
5936 | 0 | DAG.getConstant(3, DL, XLenVT)); |
5937 | 0 | Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale, |
5938 | 0 | DAG.getConstant(Val, DL, XLenVT)); |
5939 | 0 | } |
5940 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); |
5941 | 0 | } |
5942 | 0 | case ISD::FPOWI: { |
5943 | | // Custom promote f16 powi with illegal i32 integer type on RV64. Once |
5944 | | // promoted this will be legalized into a libcall by LegalizeIntegerTypes. |
5945 | 0 | if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && |
5946 | 0 | Op.getOperand(1).getValueType() == MVT::i32) { |
5947 | 0 | SDLoc DL(Op); |
5948 | 0 | SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); |
5949 | 0 | SDValue Powi = |
5950 | 0 | DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); |
5951 | 0 | return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, |
5952 | 0 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
5953 | 0 | } |
5954 | 0 | return SDValue(); |
5955 | 0 | } |
5956 | 0 | case ISD::FMAXIMUM: |
5957 | 0 | case ISD::FMINIMUM: |
5958 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
5959 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
5960 | 0 | !Subtarget.hasVInstructionsF16())) |
5961 | 0 | return SplitVectorOp(Op, DAG); |
5962 | 0 | return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); |
5963 | 0 | case ISD::FP_EXTEND: { |
5964 | 0 | SDLoc DL(Op); |
5965 | 0 | EVT VT = Op.getValueType(); |
5966 | 0 | SDValue Op0 = Op.getOperand(0); |
5967 | 0 | EVT Op0VT = Op0.getValueType(); |
5968 | 0 | if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) |
5969 | 0 | return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); |
5970 | 0 | if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) { |
5971 | 0 | SDValue FloatVal = |
5972 | 0 | DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0); |
5973 | 0 | return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal); |
5974 | 0 | } |
5975 | | |
5976 | 0 | if (!Op.getValueType().isVector()) |
5977 | 0 | return Op; |
5978 | 0 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
5979 | 0 | } |
5980 | 0 | case ISD::FP_ROUND: { |
5981 | 0 | SDLoc DL(Op); |
5982 | 0 | EVT VT = Op.getValueType(); |
5983 | 0 | SDValue Op0 = Op.getOperand(0); |
5984 | 0 | EVT Op0VT = Op0.getValueType(); |
5985 | 0 | if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin()) |
5986 | 0 | return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0); |
5987 | 0 | if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() && |
5988 | 0 | Subtarget.hasStdExtDOrZdinx()) { |
5989 | 0 | SDValue FloatVal = |
5990 | 0 | DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0, |
5991 | 0 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
5992 | 0 | return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal); |
5993 | 0 | } |
5994 | | |
5995 | 0 | if (!Op.getValueType().isVector()) |
5996 | 0 | return Op; |
5997 | 0 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
5998 | 0 | } |
5999 | 0 | case ISD::STRICT_FP_ROUND: |
6000 | 0 | case ISD::STRICT_FP_EXTEND: |
6001 | 0 | return lowerStrictFPExtendOrRoundLike(Op, DAG); |
6002 | 0 | case ISD::SINT_TO_FP: |
6003 | 0 | case ISD::UINT_TO_FP: |
6004 | 0 | if (Op.getValueType().isVector() && |
6005 | 0 | Op.getValueType().getScalarType() == MVT::f16 && |
6006 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6007 | 0 | !Subtarget.hasVInstructionsF16())) { |
6008 | 0 | if (Op.getValueType() == MVT::nxv32f16) |
6009 | 0 | return SplitVectorOp(Op, DAG); |
6010 | | // int -> f32 |
6011 | 0 | SDLoc DL(Op); |
6012 | 0 | MVT NVT = |
6013 | 0 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); |
6014 | 0 | SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); |
6015 | | // f32 -> f16 |
6016 | 0 | return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, |
6017 | 0 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
6018 | 0 | } |
6019 | 0 | [[fallthrough]]; |
6020 | 0 | case ISD::FP_TO_SINT: |
6021 | 0 | case ISD::FP_TO_UINT: |
6022 | 0 | if (SDValue Op1 = Op.getOperand(0); |
6023 | 0 | Op1.getValueType().isVector() && |
6024 | 0 | Op1.getValueType().getScalarType() == MVT::f16 && |
6025 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6026 | 0 | !Subtarget.hasVInstructionsF16())) { |
6027 | 0 | if (Op1.getValueType() == MVT::nxv32f16) |
6028 | 0 | return SplitVectorOp(Op, DAG); |
6029 | | // f16 -> f32 |
6030 | 0 | SDLoc DL(Op); |
6031 | 0 | MVT NVT = MVT::getVectorVT(MVT::f32, |
6032 | 0 | Op1.getValueType().getVectorElementCount()); |
6033 | 0 | SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); |
6034 | | // f32 -> int |
6035 | 0 | return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec); |
6036 | 0 | } |
6037 | 0 | [[fallthrough]]; |
6038 | 0 | case ISD::STRICT_FP_TO_SINT: |
6039 | 0 | case ISD::STRICT_FP_TO_UINT: |
6040 | 0 | case ISD::STRICT_SINT_TO_FP: |
6041 | 0 | case ISD::STRICT_UINT_TO_FP: { |
6042 | | // RVV can only do fp<->int conversions to types half/double the size as |
6043 | | // the source. We custom-lower any conversions that do two hops into |
6044 | | // sequences. |
6045 | 0 | MVT VT = Op.getSimpleValueType(); |
6046 | 0 | if (!VT.isVector()) |
6047 | 0 | return Op; |
6048 | 0 | SDLoc DL(Op); |
6049 | 0 | bool IsStrict = Op->isStrictFPOpcode(); |
6050 | 0 | SDValue Src = Op.getOperand(0 + IsStrict); |
6051 | 0 | MVT EltVT = VT.getVectorElementType(); |
6052 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
6053 | 0 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
6054 | 0 | unsigned EltSize = EltVT.getSizeInBits(); |
6055 | 0 | unsigned SrcEltSize = SrcEltVT.getSizeInBits(); |
6056 | 0 | assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && |
6057 | 0 | "Unexpected vector element types"); |
6058 | | |
6059 | 0 | bool IsInt2FP = SrcEltVT.isInteger(); |
6060 | | // Widening conversions |
6061 | 0 | if (EltSize > (2 * SrcEltSize)) { |
6062 | 0 | if (IsInt2FP) { |
6063 | | // Do a regular integer sign/zero extension then convert to float. |
6064 | 0 | MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), |
6065 | 0 | VT.getVectorElementCount()); |
6066 | 0 | unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP || |
6067 | 0 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
6068 | 0 | ? ISD::ZERO_EXTEND |
6069 | 0 | : ISD::SIGN_EXTEND; |
6070 | 0 | SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); |
6071 | 0 | if (IsStrict) |
6072 | 0 | return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), |
6073 | 0 | Op.getOperand(0), Ext); |
6074 | 0 | return DAG.getNode(Op.getOpcode(), DL, VT, Ext); |
6075 | 0 | } |
6076 | | // FP2Int |
6077 | 0 | assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); |
6078 | | // Do one doubling fp_extend then complete the operation by converting |
6079 | | // to int. |
6080 | 0 | MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
6081 | 0 | if (IsStrict) { |
6082 | 0 | auto [FExt, Chain] = |
6083 | 0 | DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT); |
6084 | 0 | return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt); |
6085 | 0 | } |
6086 | 0 | SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); |
6087 | 0 | return DAG.getNode(Op.getOpcode(), DL, VT, FExt); |
6088 | 0 | } |
6089 | | |
6090 | | // Narrowing conversions |
6091 | 0 | if (SrcEltSize > (2 * EltSize)) { |
6092 | 0 | if (IsInt2FP) { |
6093 | | // One narrowing int_to_fp, then an fp_round. |
6094 | 0 | assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); |
6095 | 0 | MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); |
6096 | 0 | if (IsStrict) { |
6097 | 0 | SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, |
6098 | 0 | DAG.getVTList(InterimFVT, MVT::Other), |
6099 | 0 | Op.getOperand(0), Src); |
6100 | 0 | SDValue Chain = Int2FP.getValue(1); |
6101 | 0 | return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first; |
6102 | 0 | } |
6103 | 0 | SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); |
6104 | 0 | return DAG.getFPExtendOrRound(Int2FP, DL, VT); |
6105 | 0 | } |
6106 | | // FP2Int |
6107 | | // One narrowing fp_to_int, then truncate the integer. If the float isn't |
6108 | | // representable by the integer, the result is poison. |
6109 | 0 | MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), |
6110 | 0 | VT.getVectorElementCount()); |
6111 | 0 | if (IsStrict) { |
6112 | 0 | SDValue FP2Int = |
6113 | 0 | DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other), |
6114 | 0 | Op.getOperand(0), Src); |
6115 | 0 | SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); |
6116 | 0 | return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL); |
6117 | 0 | } |
6118 | 0 | SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); |
6119 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); |
6120 | 0 | } |
6121 | | |
6122 | | // Scalable vectors can exit here. Patterns will handle equally-sized |
6123 | | // conversions halving/doubling ones. |
6124 | 0 | if (!VT.isFixedLengthVector()) |
6125 | 0 | return Op; |
6126 | | |
6127 | | // For fixed-length vectors we lower to a custom "VL" node. |
6128 | 0 | unsigned RVVOpc = 0; |
6129 | 0 | switch (Op.getOpcode()) { |
6130 | 0 | default: |
6131 | 0 | llvm_unreachable("Impossible opcode"); |
6132 | 0 | case ISD::FP_TO_SINT: |
6133 | 0 | RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; |
6134 | 0 | break; |
6135 | 0 | case ISD::FP_TO_UINT: |
6136 | 0 | RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; |
6137 | 0 | break; |
6138 | 0 | case ISD::SINT_TO_FP: |
6139 | 0 | RVVOpc = RISCVISD::SINT_TO_FP_VL; |
6140 | 0 | break; |
6141 | 0 | case ISD::UINT_TO_FP: |
6142 | 0 | RVVOpc = RISCVISD::UINT_TO_FP_VL; |
6143 | 0 | break; |
6144 | 0 | case ISD::STRICT_FP_TO_SINT: |
6145 | 0 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL; |
6146 | 0 | break; |
6147 | 0 | case ISD::STRICT_FP_TO_UINT: |
6148 | 0 | RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL; |
6149 | 0 | break; |
6150 | 0 | case ISD::STRICT_SINT_TO_FP: |
6151 | 0 | RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL; |
6152 | 0 | break; |
6153 | 0 | case ISD::STRICT_UINT_TO_FP: |
6154 | 0 | RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL; |
6155 | 0 | break; |
6156 | 0 | } |
6157 | | |
6158 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
6159 | 0 | MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); |
6160 | 0 | assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && |
6161 | 0 | "Expected same element count"); |
6162 | | |
6163 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
6164 | |
|
6165 | 0 | Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
6166 | 0 | if (IsStrict) { |
6167 | 0 | Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), |
6168 | 0 | Op.getOperand(0), Src, Mask, VL); |
6169 | 0 | SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget); |
6170 | 0 | return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL); |
6171 | 0 | } |
6172 | 0 | Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); |
6173 | 0 | return convertFromScalableVector(VT, Src, DAG, Subtarget); |
6174 | 0 | } |
6175 | 0 | case ISD::FP_TO_SINT_SAT: |
6176 | 0 | case ISD::FP_TO_UINT_SAT: |
6177 | 0 | return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); |
6178 | 0 | case ISD::FP_TO_BF16: { |
6179 | | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6180 | | // float ABIs. |
6181 | 0 | assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization"); |
6182 | 0 | SDLoc DL(Op); |
6183 | 0 | MakeLibCallOptions CallOptions; |
6184 | 0 | RTLIB::Libcall LC = |
6185 | 0 | RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); |
6186 | 0 | SDValue Res = |
6187 | 0 | makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; |
6188 | 0 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6189 | 0 | return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); |
6190 | 0 | return DAG.getBitcast(MVT::i32, Res); |
6191 | 0 | } |
6192 | 0 | case ISD::BF16_TO_FP: { |
6193 | 0 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization"); |
6194 | 0 | MVT VT = Op.getSimpleValueType(); |
6195 | 0 | SDLoc DL(Op); |
6196 | 0 | Op = DAG.getNode( |
6197 | 0 | ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0), |
6198 | 0 | DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL)); |
6199 | 0 | SDValue Res = Subtarget.is64Bit() |
6200 | 0 | ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op) |
6201 | 0 | : DAG.getBitcast(MVT::f32, Op); |
6202 | | // fp_extend if the target VT is bigger than f32. |
6203 | 0 | if (VT != MVT::f32) |
6204 | 0 | return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res); |
6205 | 0 | return Res; |
6206 | 0 | } |
6207 | 0 | case ISD::FP_TO_FP16: { |
6208 | | // Custom lower to ensure the libcall return is passed in an FPR on hard |
6209 | | // float ABIs. |
6210 | 0 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); |
6211 | 0 | SDLoc DL(Op); |
6212 | 0 | MakeLibCallOptions CallOptions; |
6213 | 0 | RTLIB::Libcall LC = |
6214 | 0 | RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16); |
6215 | 0 | SDValue Res = |
6216 | 0 | makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; |
6217 | 0 | if (Subtarget.is64Bit() && !RV64LegalI32) |
6218 | 0 | return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res); |
6219 | 0 | return DAG.getBitcast(MVT::i32, Res); |
6220 | 0 | } |
6221 | 0 | case ISD::FP16_TO_FP: { |
6222 | | // Custom lower to ensure the libcall argument is passed in an FPR on hard |
6223 | | // float ABIs. |
6224 | 0 | assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation"); |
6225 | 0 | SDLoc DL(Op); |
6226 | 0 | MakeLibCallOptions CallOptions; |
6227 | 0 | SDValue Arg = Subtarget.is64Bit() |
6228 | 0 | ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, |
6229 | 0 | Op.getOperand(0)) |
6230 | 0 | : DAG.getBitcast(MVT::f32, Op.getOperand(0)); |
6231 | 0 | SDValue Res = |
6232 | 0 | makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL) |
6233 | 0 | .first; |
6234 | 0 | return Res; |
6235 | 0 | } |
6236 | 0 | case ISD::FTRUNC: |
6237 | 0 | case ISD::FCEIL: |
6238 | 0 | case ISD::FFLOOR: |
6239 | 0 | case ISD::FNEARBYINT: |
6240 | 0 | case ISD::FRINT: |
6241 | 0 | case ISD::FROUND: |
6242 | 0 | case ISD::FROUNDEVEN: |
6243 | 0 | return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6244 | 0 | case ISD::LRINT: |
6245 | 0 | case ISD::LLRINT: |
6246 | 0 | return lowerVectorXRINT(Op, DAG, Subtarget); |
6247 | 0 | case ISD::VECREDUCE_ADD: |
6248 | 0 | case ISD::VECREDUCE_UMAX: |
6249 | 0 | case ISD::VECREDUCE_SMAX: |
6250 | 0 | case ISD::VECREDUCE_UMIN: |
6251 | 0 | case ISD::VECREDUCE_SMIN: |
6252 | 0 | return lowerVECREDUCE(Op, DAG); |
6253 | 0 | case ISD::VECREDUCE_AND: |
6254 | 0 | case ISD::VECREDUCE_OR: |
6255 | 0 | case ISD::VECREDUCE_XOR: |
6256 | 0 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
6257 | 0 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); |
6258 | 0 | return lowerVECREDUCE(Op, DAG); |
6259 | 0 | case ISD::VECREDUCE_FADD: |
6260 | 0 | case ISD::VECREDUCE_SEQ_FADD: |
6261 | 0 | case ISD::VECREDUCE_FMIN: |
6262 | 0 | case ISD::VECREDUCE_FMAX: |
6263 | 0 | return lowerFPVECREDUCE(Op, DAG); |
6264 | 0 | case ISD::VP_REDUCE_ADD: |
6265 | 0 | case ISD::VP_REDUCE_UMAX: |
6266 | 0 | case ISD::VP_REDUCE_SMAX: |
6267 | 0 | case ISD::VP_REDUCE_UMIN: |
6268 | 0 | case ISD::VP_REDUCE_SMIN: |
6269 | 0 | case ISD::VP_REDUCE_FADD: |
6270 | 0 | case ISD::VP_REDUCE_SEQ_FADD: |
6271 | 0 | case ISD::VP_REDUCE_FMIN: |
6272 | 0 | case ISD::VP_REDUCE_FMAX: |
6273 | 0 | if (Op.getOperand(1).getValueType() == MVT::nxv32f16 && |
6274 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6275 | 0 | !Subtarget.hasVInstructionsF16())) |
6276 | 0 | return SplitVectorReductionOp(Op, DAG); |
6277 | 0 | return lowerVPREDUCE(Op, DAG); |
6278 | 0 | case ISD::VP_REDUCE_AND: |
6279 | 0 | case ISD::VP_REDUCE_OR: |
6280 | 0 | case ISD::VP_REDUCE_XOR: |
6281 | 0 | if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) |
6282 | 0 | return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); |
6283 | 0 | return lowerVPREDUCE(Op, DAG); |
6284 | 0 | case ISD::UNDEF: { |
6285 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType()); |
6286 | 0 | return convertFromScalableVector(Op.getSimpleValueType(), |
6287 | 0 | DAG.getUNDEF(ContainerVT), DAG, Subtarget); |
6288 | 0 | } |
6289 | 0 | case ISD::INSERT_SUBVECTOR: |
6290 | 0 | return lowerINSERT_SUBVECTOR(Op, DAG); |
6291 | 0 | case ISD::EXTRACT_SUBVECTOR: |
6292 | 0 | return lowerEXTRACT_SUBVECTOR(Op, DAG); |
6293 | 0 | case ISD::VECTOR_DEINTERLEAVE: |
6294 | 0 | return lowerVECTOR_DEINTERLEAVE(Op, DAG); |
6295 | 0 | case ISD::VECTOR_INTERLEAVE: |
6296 | 0 | return lowerVECTOR_INTERLEAVE(Op, DAG); |
6297 | 0 | case ISD::STEP_VECTOR: |
6298 | 0 | return lowerSTEP_VECTOR(Op, DAG); |
6299 | 0 | case ISD::VECTOR_REVERSE: |
6300 | 0 | return lowerVECTOR_REVERSE(Op, DAG); |
6301 | 0 | case ISD::VECTOR_SPLICE: |
6302 | 0 | return lowerVECTOR_SPLICE(Op, DAG); |
6303 | 0 | case ISD::BUILD_VECTOR: |
6304 | 0 | return lowerBUILD_VECTOR(Op, DAG, Subtarget); |
6305 | 0 | case ISD::SPLAT_VECTOR: |
6306 | 0 | if (Op.getValueType().getScalarType() == MVT::f16 && |
6307 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6308 | 0 | !Subtarget.hasVInstructionsF16())) { |
6309 | 0 | if (Op.getValueType() == MVT::nxv32f16) |
6310 | 0 | return SplitVectorOp(Op, DAG); |
6311 | 0 | SDLoc DL(Op); |
6312 | 0 | SDValue NewScalar = |
6313 | 0 | DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); |
6314 | 0 | SDValue NewSplat = DAG.getNode( |
6315 | 0 | ISD::SPLAT_VECTOR, DL, |
6316 | 0 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()), |
6317 | 0 | NewScalar); |
6318 | 0 | return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat, |
6319 | 0 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
6320 | 0 | } |
6321 | 0 | if (Op.getValueType().getVectorElementType() == MVT::i1) |
6322 | 0 | return lowerVectorMaskSplat(Op, DAG); |
6323 | 0 | return SDValue(); |
6324 | 0 | case ISD::VECTOR_SHUFFLE: |
6325 | 0 | return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); |
6326 | 0 | case ISD::CONCAT_VECTORS: { |
6327 | | // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is |
6328 | | // better than going through the stack, as the default expansion does. |
6329 | 0 | SDLoc DL(Op); |
6330 | 0 | MVT VT = Op.getSimpleValueType(); |
6331 | 0 | unsigned NumOpElts = |
6332 | 0 | Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); |
6333 | 0 | SDValue Vec = DAG.getUNDEF(VT); |
6334 | 0 | for (const auto &OpIdx : enumerate(Op->ops())) { |
6335 | 0 | SDValue SubVec = OpIdx.value(); |
6336 | | // Don't insert undef subvectors. |
6337 | 0 | if (SubVec.isUndef()) |
6338 | 0 | continue; |
6339 | 0 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, |
6340 | 0 | DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); |
6341 | 0 | } |
6342 | 0 | return Vec; |
6343 | 0 | } |
6344 | 0 | case ISD::LOAD: |
6345 | 0 | if (auto V = expandUnalignedRVVLoad(Op, DAG)) |
6346 | 0 | return V; |
6347 | 0 | if (Op.getValueType().isFixedLengthVector()) |
6348 | 0 | return lowerFixedLengthVectorLoadToRVV(Op, DAG); |
6349 | 0 | return Op; |
6350 | 0 | case ISD::STORE: |
6351 | 0 | if (auto V = expandUnalignedRVVStore(Op, DAG)) |
6352 | 0 | return V; |
6353 | 0 | if (Op.getOperand(1).getValueType().isFixedLengthVector()) |
6354 | 0 | return lowerFixedLengthVectorStoreToRVV(Op, DAG); |
6355 | 0 | return Op; |
6356 | 0 | case ISD::MLOAD: |
6357 | 0 | case ISD::VP_LOAD: |
6358 | 0 | return lowerMaskedLoad(Op, DAG); |
6359 | 0 | case ISD::MSTORE: |
6360 | 0 | case ISD::VP_STORE: |
6361 | 0 | return lowerMaskedStore(Op, DAG); |
6362 | 0 | case ISD::SELECT_CC: { |
6363 | | // This occurs because we custom legalize SETGT and SETUGT for setcc. That |
6364 | | // causes LegalizeDAG to think we need to custom legalize select_cc. Expand |
6365 | | // into separate SETCC+SELECT just like LegalizeDAG. |
6366 | 0 | SDValue Tmp1 = Op.getOperand(0); |
6367 | 0 | SDValue Tmp2 = Op.getOperand(1); |
6368 | 0 | SDValue True = Op.getOperand(2); |
6369 | 0 | SDValue False = Op.getOperand(3); |
6370 | 0 | EVT VT = Op.getValueType(); |
6371 | 0 | SDValue CC = Op.getOperand(4); |
6372 | 0 | EVT CmpVT = Tmp1.getValueType(); |
6373 | 0 | EVT CCVT = |
6374 | 0 | getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); |
6375 | 0 | SDLoc DL(Op); |
6376 | 0 | SDValue Cond = |
6377 | 0 | DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); |
6378 | 0 | return DAG.getSelect(DL, VT, Cond, True, False); |
6379 | 0 | } |
6380 | 11.9k | case ISD::SETCC: { |
6381 | 11.9k | MVT OpVT = Op.getOperand(0).getSimpleValueType(); |
6382 | 11.9k | if (OpVT.isScalarInteger()) { |
6383 | 11.9k | MVT VT = Op.getSimpleValueType(); |
6384 | 11.9k | SDValue LHS = Op.getOperand(0); |
6385 | 11.9k | SDValue RHS = Op.getOperand(1); |
6386 | 11.9k | ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
6387 | 11.9k | assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && |
6388 | 11.9k | "Unexpected CondCode"); |
6389 | | |
6390 | 0 | SDLoc DL(Op); |
6391 | | |
6392 | | // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can |
6393 | | // convert this to the equivalent of (set(u)ge X, C+1) by using |
6394 | | // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant |
6395 | | // in a register. |
6396 | 11.9k | if (isa<ConstantSDNode>(RHS)) { |
6397 | 7.97k | int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); |
6398 | 7.97k | if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { |
6399 | | // If this is an unsigned compare and the constant is -1, incrementing |
6400 | | // the constant would change behavior. The result should be false. |
6401 | 4.30k | if (CCVal == ISD::SETUGT && Imm == -1) |
6402 | 0 | return DAG.getConstant(0, DL, VT); |
6403 | | // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. |
6404 | 4.30k | CCVal = ISD::getSetCCSwappedOperands(CCVal); |
6405 | 4.30k | SDValue SetCC = DAG.getSetCC( |
6406 | 4.30k | DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); |
6407 | 4.30k | return DAG.getLogicalNOT(DL, SetCC, VT); |
6408 | 4.30k | } |
6409 | 7.97k | } |
6410 | | |
6411 | | // Not a constant we could handle, swap the operands and condition code to |
6412 | | // SETLT/SETULT. |
6413 | 7.66k | CCVal = ISD::getSetCCSwappedOperands(CCVal); |
6414 | 7.66k | return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); |
6415 | 11.9k | } |
6416 | | |
6417 | 0 | if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && |
6418 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6419 | 0 | !Subtarget.hasVInstructionsF16())) |
6420 | 0 | return SplitVectorOp(Op, DAG); |
6421 | | |
6422 | 0 | return lowerFixedLengthVectorSetccToRVV(Op, DAG); |
6423 | 0 | } |
6424 | 0 | case ISD::ADD: |
6425 | 0 | case ISD::SUB: |
6426 | 0 | case ISD::MUL: |
6427 | 0 | case ISD::MULHS: |
6428 | 0 | case ISD::MULHU: |
6429 | 0 | case ISD::AND: |
6430 | 0 | case ISD::OR: |
6431 | 0 | case ISD::XOR: |
6432 | 0 | case ISD::SDIV: |
6433 | 0 | case ISD::SREM: |
6434 | 0 | case ISD::UDIV: |
6435 | 0 | case ISD::UREM: |
6436 | 0 | case ISD::BSWAP: |
6437 | 0 | case ISD::CTPOP: |
6438 | 0 | return lowerToScalableOp(Op, DAG); |
6439 | 0 | case ISD::SHL: |
6440 | 0 | case ISD::SRA: |
6441 | 0 | case ISD::SRL: |
6442 | 0 | if (Op.getSimpleValueType().isFixedLengthVector()) |
6443 | 0 | return lowerToScalableOp(Op, DAG); |
6444 | | // This can be called for an i32 shift amount that needs to be promoted. |
6445 | 0 | assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && |
6446 | 0 | "Unexpected custom legalisation"); |
6447 | 0 | return SDValue(); |
6448 | 0 | case ISD::FADD: |
6449 | 0 | case ISD::FSUB: |
6450 | 0 | case ISD::FMUL: |
6451 | 0 | case ISD::FDIV: |
6452 | 0 | case ISD::FNEG: |
6453 | 0 | case ISD::FABS: |
6454 | 0 | case ISD::FSQRT: |
6455 | 0 | case ISD::FMA: |
6456 | 0 | case ISD::FMINNUM: |
6457 | 0 | case ISD::FMAXNUM: |
6458 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
6459 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6460 | 0 | !Subtarget.hasVInstructionsF16())) |
6461 | 0 | return SplitVectorOp(Op, DAG); |
6462 | 0 | [[fallthrough]]; |
6463 | 0 | case ISD::AVGFLOORU: |
6464 | 0 | case ISD::AVGCEILU: |
6465 | 0 | case ISD::SADDSAT: |
6466 | 0 | case ISD::UADDSAT: |
6467 | 0 | case ISD::SSUBSAT: |
6468 | 0 | case ISD::USUBSAT: |
6469 | 0 | case ISD::SMIN: |
6470 | 0 | case ISD::SMAX: |
6471 | 0 | case ISD::UMIN: |
6472 | 0 | case ISD::UMAX: |
6473 | 0 | return lowerToScalableOp(Op, DAG); |
6474 | 0 | case ISD::ABS: |
6475 | 0 | case ISD::VP_ABS: |
6476 | 0 | return lowerABS(Op, DAG); |
6477 | 0 | case ISD::CTLZ: |
6478 | 0 | case ISD::CTLZ_ZERO_UNDEF: |
6479 | 0 | case ISD::CTTZ: |
6480 | 0 | case ISD::CTTZ_ZERO_UNDEF: |
6481 | 0 | if (Subtarget.hasStdExtZvbb()) |
6482 | 0 | return lowerToScalableOp(Op, DAG); |
6483 | 0 | assert(Op.getOpcode() != ISD::CTTZ); |
6484 | 0 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6485 | 0 | case ISD::VSELECT: |
6486 | 0 | return lowerFixedLengthVectorSelectToRVV(Op, DAG); |
6487 | 0 | case ISD::FCOPYSIGN: |
6488 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
6489 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6490 | 0 | !Subtarget.hasVInstructionsF16())) |
6491 | 0 | return SplitVectorOp(Op, DAG); |
6492 | 0 | return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); |
6493 | 0 | case ISD::STRICT_FADD: |
6494 | 0 | case ISD::STRICT_FSUB: |
6495 | 0 | case ISD::STRICT_FMUL: |
6496 | 0 | case ISD::STRICT_FDIV: |
6497 | 0 | case ISD::STRICT_FSQRT: |
6498 | 0 | case ISD::STRICT_FMA: |
6499 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
6500 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6501 | 0 | !Subtarget.hasVInstructionsF16())) |
6502 | 0 | return SplitStrictFPVectorOp(Op, DAG); |
6503 | 0 | return lowerToScalableOp(Op, DAG); |
6504 | 0 | case ISD::STRICT_FSETCC: |
6505 | 0 | case ISD::STRICT_FSETCCS: |
6506 | 0 | return lowerVectorStrictFSetcc(Op, DAG); |
6507 | 0 | case ISD::STRICT_FCEIL: |
6508 | 0 | case ISD::STRICT_FRINT: |
6509 | 0 | case ISD::STRICT_FFLOOR: |
6510 | 0 | case ISD::STRICT_FTRUNC: |
6511 | 0 | case ISD::STRICT_FNEARBYINT: |
6512 | 0 | case ISD::STRICT_FROUND: |
6513 | 0 | case ISD::STRICT_FROUNDEVEN: |
6514 | 0 | return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6515 | 0 | case ISD::MGATHER: |
6516 | 0 | case ISD::VP_GATHER: |
6517 | 0 | return lowerMaskedGather(Op, DAG); |
6518 | 0 | case ISD::MSCATTER: |
6519 | 0 | case ISD::VP_SCATTER: |
6520 | 0 | return lowerMaskedScatter(Op, DAG); |
6521 | 0 | case ISD::GET_ROUNDING: |
6522 | 0 | return lowerGET_ROUNDING(Op, DAG); |
6523 | 0 | case ISD::SET_ROUNDING: |
6524 | 0 | return lowerSET_ROUNDING(Op, DAG); |
6525 | 0 | case ISD::EH_DWARF_CFA: |
6526 | 0 | return lowerEH_DWARF_CFA(Op, DAG); |
6527 | 0 | case ISD::VP_SELECT: |
6528 | 0 | case ISD::VP_MERGE: |
6529 | 0 | case ISD::VP_ADD: |
6530 | 0 | case ISD::VP_SUB: |
6531 | 0 | case ISD::VP_MUL: |
6532 | 0 | case ISD::VP_SDIV: |
6533 | 0 | case ISD::VP_UDIV: |
6534 | 0 | case ISD::VP_SREM: |
6535 | 0 | case ISD::VP_UREM: |
6536 | 0 | return lowerVPOp(Op, DAG); |
6537 | 0 | case ISD::VP_AND: |
6538 | 0 | case ISD::VP_OR: |
6539 | 0 | case ISD::VP_XOR: |
6540 | 0 | return lowerLogicVPOp(Op, DAG); |
6541 | 0 | case ISD::VP_FADD: |
6542 | 0 | case ISD::VP_FSUB: |
6543 | 0 | case ISD::VP_FMUL: |
6544 | 0 | case ISD::VP_FDIV: |
6545 | 0 | case ISD::VP_FNEG: |
6546 | 0 | case ISD::VP_FABS: |
6547 | 0 | case ISD::VP_SQRT: |
6548 | 0 | case ISD::VP_FMA: |
6549 | 0 | case ISD::VP_FMINNUM: |
6550 | 0 | case ISD::VP_FMAXNUM: |
6551 | 0 | case ISD::VP_FCOPYSIGN: |
6552 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
6553 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6554 | 0 | !Subtarget.hasVInstructionsF16())) |
6555 | 0 | return SplitVPOp(Op, DAG); |
6556 | 0 | [[fallthrough]]; |
6557 | 0 | case ISD::VP_ASHR: |
6558 | 0 | case ISD::VP_LSHR: |
6559 | 0 | case ISD::VP_SHL: |
6560 | 0 | return lowerVPOp(Op, DAG); |
6561 | 0 | case ISD::VP_IS_FPCLASS: |
6562 | 0 | return LowerIS_FPCLASS(Op, DAG); |
6563 | 0 | case ISD::VP_SIGN_EXTEND: |
6564 | 0 | case ISD::VP_ZERO_EXTEND: |
6565 | 0 | if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) |
6566 | 0 | return lowerVPExtMaskOp(Op, DAG); |
6567 | 0 | return lowerVPOp(Op, DAG); |
6568 | 0 | case ISD::VP_TRUNCATE: |
6569 | 0 | return lowerVectorTruncLike(Op, DAG); |
6570 | 0 | case ISD::VP_FP_EXTEND: |
6571 | 0 | case ISD::VP_FP_ROUND: |
6572 | 0 | return lowerVectorFPExtendOrRoundLike(Op, DAG); |
6573 | 0 | case ISD::VP_SINT_TO_FP: |
6574 | 0 | case ISD::VP_UINT_TO_FP: |
6575 | 0 | if (Op.getValueType().isVector() && |
6576 | 0 | Op.getValueType().getScalarType() == MVT::f16 && |
6577 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6578 | 0 | !Subtarget.hasVInstructionsF16())) { |
6579 | 0 | if (Op.getValueType() == MVT::nxv32f16) |
6580 | 0 | return SplitVPOp(Op, DAG); |
6581 | | // int -> f32 |
6582 | 0 | SDLoc DL(Op); |
6583 | 0 | MVT NVT = |
6584 | 0 | MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()); |
6585 | 0 | auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops()); |
6586 | | // f32 -> f16 |
6587 | 0 | return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC, |
6588 | 0 | DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); |
6589 | 0 | } |
6590 | 0 | [[fallthrough]]; |
6591 | 0 | case ISD::VP_FP_TO_SINT: |
6592 | 0 | case ISD::VP_FP_TO_UINT: |
6593 | 0 | if (SDValue Op1 = Op.getOperand(0); |
6594 | 0 | Op1.getValueType().isVector() && |
6595 | 0 | Op1.getValueType().getScalarType() == MVT::f16 && |
6596 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6597 | 0 | !Subtarget.hasVInstructionsF16())) { |
6598 | 0 | if (Op1.getValueType() == MVT::nxv32f16) |
6599 | 0 | return SplitVPOp(Op, DAG); |
6600 | | // f16 -> f32 |
6601 | 0 | SDLoc DL(Op); |
6602 | 0 | MVT NVT = MVT::getVectorVT(MVT::f32, |
6603 | 0 | Op1.getValueType().getVectorElementCount()); |
6604 | 0 | SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1); |
6605 | | // f32 -> int |
6606 | 0 | return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), |
6607 | 0 | {WidenVec, Op.getOperand(1), Op.getOperand(2)}); |
6608 | 0 | } |
6609 | 0 | return lowerVPFPIntConvOp(Op, DAG); |
6610 | 0 | case ISD::VP_SETCC: |
6611 | 0 | if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 && |
6612 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6613 | 0 | !Subtarget.hasVInstructionsF16())) |
6614 | 0 | return SplitVPOp(Op, DAG); |
6615 | 0 | if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) |
6616 | 0 | return lowerVPSetCCMaskOp(Op, DAG); |
6617 | 0 | [[fallthrough]]; |
6618 | 0 | case ISD::VP_SMIN: |
6619 | 0 | case ISD::VP_SMAX: |
6620 | 0 | case ISD::VP_UMIN: |
6621 | 0 | case ISD::VP_UMAX: |
6622 | 0 | case ISD::VP_BITREVERSE: |
6623 | 0 | case ISD::VP_BSWAP: |
6624 | 0 | return lowerVPOp(Op, DAG); |
6625 | 0 | case ISD::VP_CTLZ: |
6626 | 0 | case ISD::VP_CTLZ_ZERO_UNDEF: |
6627 | 0 | if (Subtarget.hasStdExtZvbb()) |
6628 | 0 | return lowerVPOp(Op, DAG); |
6629 | 0 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6630 | 0 | case ISD::VP_CTTZ: |
6631 | 0 | case ISD::VP_CTTZ_ZERO_UNDEF: |
6632 | 0 | if (Subtarget.hasStdExtZvbb()) |
6633 | 0 | return lowerVPOp(Op, DAG); |
6634 | 0 | return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); |
6635 | 0 | case ISD::VP_CTPOP: |
6636 | 0 | return lowerVPOp(Op, DAG); |
6637 | 0 | case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: |
6638 | 0 | return lowerVPStridedLoad(Op, DAG); |
6639 | 0 | case ISD::EXPERIMENTAL_VP_STRIDED_STORE: |
6640 | 0 | return lowerVPStridedStore(Op, DAG); |
6641 | 0 | case ISD::VP_FCEIL: |
6642 | 0 | case ISD::VP_FFLOOR: |
6643 | 0 | case ISD::VP_FRINT: |
6644 | 0 | case ISD::VP_FNEARBYINT: |
6645 | 0 | case ISD::VP_FROUND: |
6646 | 0 | case ISD::VP_FROUNDEVEN: |
6647 | 0 | case ISD::VP_FROUNDTOZERO: |
6648 | 0 | if (Op.getValueType() == MVT::nxv32f16 && |
6649 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
6650 | 0 | !Subtarget.hasVInstructionsF16())) |
6651 | 0 | return SplitVPOp(Op, DAG); |
6652 | 0 | return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); |
6653 | 0 | case ISD::EXPERIMENTAL_VP_SPLICE: |
6654 | 0 | return lowerVPSpliceExperimental(Op, DAG); |
6655 | 0 | case ISD::EXPERIMENTAL_VP_REVERSE: |
6656 | 0 | return lowerVPReverseExperimental(Op, DAG); |
6657 | 219k | } |
6658 | 219k | } |
6659 | | |
6660 | | static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, |
6661 | 125k | SelectionDAG &DAG, unsigned Flags) { |
6662 | 125k | return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); |
6663 | 125k | } |
6664 | | |
6665 | | static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty, |
6666 | 0 | SelectionDAG &DAG, unsigned Flags) { |
6667 | 0 | return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), |
6668 | 0 | Flags); |
6669 | 0 | } |
6670 | | |
6671 | | static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, |
6672 | 6 | SelectionDAG &DAG, unsigned Flags) { |
6673 | 6 | return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), |
6674 | 6 | N->getOffset(), Flags); |
6675 | 6 | } |
6676 | | |
6677 | | static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty, |
6678 | 0 | SelectionDAG &DAG, unsigned Flags) { |
6679 | 0 | return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); |
6680 | 0 | } |
6681 | | |
6682 | | template <class NodeTy> |
6683 | | SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
6684 | 62.6k | bool IsLocal, bool IsExternWeak) const { |
6685 | 62.6k | SDLoc DL(N); |
6686 | 62.6k | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6687 | | |
6688 | | // When HWASAN is used and tagging of global variables is enabled |
6689 | | // they should be accessed via the GOT, since the tagged address of a global |
6690 | | // is incompatible with existing code models. This also applies to non-pic |
6691 | | // mode. |
6692 | 62.6k | if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { |
6693 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
6694 | 0 | if (IsLocal && !Subtarget.allowTaggedGlobals()) |
6695 | | // Use PC-relative addressing to access the symbol. This generates the |
6696 | | // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) |
6697 | | // %pcrel_lo(auipc)). |
6698 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); |
6699 | | |
6700 | | // Use PC-relative addressing to access the GOT for this symbol, then load |
6701 | | // the address from the GOT. This generates the pattern (PseudoLGA sym), |
6702 | | // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
6703 | 0 | SDValue Load = |
6704 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); |
6705 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
6706 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
6707 | 0 | MachinePointerInfo::getGOT(MF), |
6708 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
6709 | 0 | MachineMemOperand::MOInvariant, |
6710 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); |
6711 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); |
6712 | 0 | return Load; |
6713 | 0 | } |
6714 | | |
6715 | 62.6k | switch (getTargetMachine().getCodeModel()) { |
6716 | 0 | default: |
6717 | 0 | report_fatal_error("Unsupported code model for lowering"); |
6718 | 62.6k | case CodeModel::Small: { |
6719 | | // Generate a sequence for accessing addresses within the first 2 GiB of |
6720 | | // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). |
6721 | 62.6k | SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); |
6722 | 62.6k | SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); |
6723 | 62.6k | SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); |
6724 | 62.6k | return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); |
6725 | 0 | } |
6726 | 0 | case CodeModel::Medium: { |
6727 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
6728 | 0 | if (IsExternWeak) { |
6729 | | // An extern weak symbol may be undefined, i.e. have value 0, which may |
6730 | | // not be within 2GiB of PC, so use GOT-indirect addressing to access the |
6731 | | // symbol. This generates the pattern (PseudoLGA sym), which expands to |
6732 | | // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). |
6733 | 0 | SDValue Load = |
6734 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); |
6735 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
6736 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
6737 | 0 | MachinePointerInfo::getGOT(MF), |
6738 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
6739 | 0 | MachineMemOperand::MOInvariant, |
6740 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); |
6741 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); |
6742 | 0 | return Load; |
6743 | 0 | } |
6744 | | |
6745 | | // Generate a sequence for accessing addresses within any 2GiB range within |
6746 | | // the address space. This generates the pattern (PseudoLLA sym), which |
6747 | | // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). |
6748 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); |
6749 | 0 | } |
6750 | 62.6k | } |
6751 | 62.6k | } llvm::SDValue llvm::RISCVTargetLowering::getAddr<llvm::GlobalAddressSDNode>(llvm::GlobalAddressSDNode*, llvm::SelectionDAG&, bool, bool) const Line | Count | Source | 6684 | 62.6k | bool IsLocal, bool IsExternWeak) const { | 6685 | 62.6k | SDLoc DL(N); | 6686 | 62.6k | EVT Ty = getPointerTy(DAG.getDataLayout()); | 6687 | | | 6688 | | // When HWASAN is used and tagging of global variables is enabled | 6689 | | // they should be accessed via the GOT, since the tagged address of a global | 6690 | | // is incompatible with existing code models. This also applies to non-pic | 6691 | | // mode. | 6692 | 62.6k | if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { | 6693 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); | 6694 | 0 | if (IsLocal && !Subtarget.allowTaggedGlobals()) | 6695 | | // Use PC-relative addressing to access the symbol. This generates the | 6696 | | // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) | 6697 | | // %pcrel_lo(auipc)). | 6698 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); | 6699 | | | 6700 | | // Use PC-relative addressing to access the GOT for this symbol, then load | 6701 | | // the address from the GOT. This generates the pattern (PseudoLGA sym), | 6702 | | // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). | 6703 | 0 | SDValue Load = | 6704 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); | 6705 | 0 | MachineFunction &MF = DAG.getMachineFunction(); | 6706 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( | 6707 | 0 | MachinePointerInfo::getGOT(MF), | 6708 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | | 6709 | 0 | MachineMemOperand::MOInvariant, | 6710 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); | 6711 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); | 6712 | 0 | return Load; | 6713 | 0 | } | 6714 | | | 6715 | 62.6k | switch (getTargetMachine().getCodeModel()) { | 6716 | 0 | default: | 6717 | 0 | report_fatal_error("Unsupported code model for lowering"); | 6718 | 62.6k | case CodeModel::Small: { | 6719 | | // Generate a sequence for accessing addresses within the first 2 GiB of | 6720 | | // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). | 6721 | 62.6k | SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); | 6722 | 62.6k | SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); | 6723 | 62.6k | SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); | 6724 | 62.6k | return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); | 6725 | 0 | } | 6726 | 0 | case CodeModel::Medium: { | 6727 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); | 6728 | 0 | if (IsExternWeak) { | 6729 | | // An extern weak symbol may be undefined, i.e. have value 0, which may | 6730 | | // not be within 2GiB of PC, so use GOT-indirect addressing to access the | 6731 | | // symbol. This generates the pattern (PseudoLGA sym), which expands to | 6732 | | // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). | 6733 | 0 | SDValue Load = | 6734 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); | 6735 | 0 | MachineFunction &MF = DAG.getMachineFunction(); | 6736 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( | 6737 | 0 | MachinePointerInfo::getGOT(MF), | 6738 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | | 6739 | 0 | MachineMemOperand::MOInvariant, | 6740 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); | 6741 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); | 6742 | 0 | return Load; | 6743 | 0 | } | 6744 | | | 6745 | | // Generate a sequence for accessing addresses within any 2GiB range within | 6746 | | // the address space. This generates the pattern (PseudoLLA sym), which | 6747 | | // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). | 6748 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); | 6749 | 0 | } | 6750 | 62.6k | } | 6751 | 62.6k | } |
Unexecuted instantiation: llvm::SDValue llvm::RISCVTargetLowering::getAddr<llvm::BlockAddressSDNode>(llvm::BlockAddressSDNode*, llvm::SelectionDAG&, bool, bool) const llvm::SDValue llvm::RISCVTargetLowering::getAddr<llvm::ConstantPoolSDNode>(llvm::ConstantPoolSDNode*, llvm::SelectionDAG&, bool, bool) const Line | Count | Source | 6684 | 3 | bool IsLocal, bool IsExternWeak) const { | 6685 | 3 | SDLoc DL(N); | 6686 | 3 | EVT Ty = getPointerTy(DAG.getDataLayout()); | 6687 | | | 6688 | | // When HWASAN is used and tagging of global variables is enabled | 6689 | | // they should be accessed via the GOT, since the tagged address of a global | 6690 | | // is incompatible with existing code models. This also applies to non-pic | 6691 | | // mode. | 6692 | 3 | if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { | 6693 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); | 6694 | 0 | if (IsLocal && !Subtarget.allowTaggedGlobals()) | 6695 | | // Use PC-relative addressing to access the symbol. This generates the | 6696 | | // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) | 6697 | | // %pcrel_lo(auipc)). | 6698 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); | 6699 | | | 6700 | | // Use PC-relative addressing to access the GOT for this symbol, then load | 6701 | | // the address from the GOT. This generates the pattern (PseudoLGA sym), | 6702 | | // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). | 6703 | 0 | SDValue Load = | 6704 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); | 6705 | 0 | MachineFunction &MF = DAG.getMachineFunction(); | 6706 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( | 6707 | 0 | MachinePointerInfo::getGOT(MF), | 6708 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | | 6709 | 0 | MachineMemOperand::MOInvariant, | 6710 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); | 6711 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); | 6712 | 0 | return Load; | 6713 | 0 | } | 6714 | | | 6715 | 3 | switch (getTargetMachine().getCodeModel()) { | 6716 | 0 | default: | 6717 | 0 | report_fatal_error("Unsupported code model for lowering"); | 6718 | 3 | case CodeModel::Small: { | 6719 | | // Generate a sequence for accessing addresses within the first 2 GiB of | 6720 | | // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). | 6721 | 3 | SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); | 6722 | 3 | SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); | 6723 | 3 | SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); | 6724 | 3 | return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); | 6725 | 0 | } | 6726 | 0 | case CodeModel::Medium: { | 6727 | 0 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); | 6728 | 0 | if (IsExternWeak) { | 6729 | | // An extern weak symbol may be undefined, i.e. have value 0, which may | 6730 | | // not be within 2GiB of PC, so use GOT-indirect addressing to access the | 6731 | | // symbol. This generates the pattern (PseudoLGA sym), which expands to | 6732 | | // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). | 6733 | 0 | SDValue Load = | 6734 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0); | 6735 | 0 | MachineFunction &MF = DAG.getMachineFunction(); | 6736 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( | 6737 | 0 | MachinePointerInfo::getGOT(MF), | 6738 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | | 6739 | 0 | MachineMemOperand::MOInvariant, | 6740 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); | 6741 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); | 6742 | 0 | return Load; | 6743 | 0 | } | 6744 | | | 6745 | | // Generate a sequence for accessing addresses within any 2GiB range within | 6746 | | // the address space. This generates the pattern (PseudoLLA sym), which | 6747 | | // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). | 6748 | 0 | return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); | 6749 | 0 | } | 6750 | 3 | } | 6751 | 3 | } |
Unexecuted instantiation: llvm::SDValue llvm::RISCVTargetLowering::getAddr<llvm::JumpTableSDNode>(llvm::JumpTableSDNode*, llvm::SelectionDAG&, bool, bool) const |
6752 | | |
6753 | | SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, |
6754 | 62.6k | SelectionDAG &DAG) const { |
6755 | 62.6k | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); |
6756 | 62.6k | assert(N->getOffset() == 0 && "unexpected offset in global node"); |
6757 | 0 | const GlobalValue *GV = N->getGlobal(); |
6758 | 62.6k | return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage()); |
6759 | 62.6k | } |
6760 | | |
6761 | | SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, |
6762 | 0 | SelectionDAG &DAG) const { |
6763 | 0 | BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); |
6764 | |
|
6765 | 0 | return getAddr(N, DAG); |
6766 | 0 | } |
6767 | | |
6768 | | SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, |
6769 | 3 | SelectionDAG &DAG) const { |
6770 | 3 | ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); |
6771 | | |
6772 | 3 | return getAddr(N, DAG); |
6773 | 3 | } |
6774 | | |
6775 | | SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, |
6776 | 0 | SelectionDAG &DAG) const { |
6777 | 0 | JumpTableSDNode *N = cast<JumpTableSDNode>(Op); |
6778 | |
|
6779 | 0 | return getAddr(N, DAG); |
6780 | 0 | } |
6781 | | |
6782 | | SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
6783 | | SelectionDAG &DAG, |
6784 | 0 | bool UseGOT) const { |
6785 | 0 | SDLoc DL(N); |
6786 | 0 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6787 | 0 | const GlobalValue *GV = N->getGlobal(); |
6788 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
6789 | |
|
6790 | 0 | if (UseGOT) { |
6791 | | // Use PC-relative addressing to access the GOT for this TLS symbol, then |
6792 | | // load the address from the GOT and add the thread pointer. This generates |
6793 | | // the pattern (PseudoLA_TLS_IE sym), which expands to |
6794 | | // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). |
6795 | 0 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); |
6796 | 0 | SDValue Load = |
6797 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); |
6798 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
6799 | 0 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
6800 | 0 | MachinePointerInfo::getGOT(MF), |
6801 | 0 | MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
6802 | 0 | MachineMemOperand::MOInvariant, |
6803 | 0 | LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); |
6804 | 0 | DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp}); |
6805 | | |
6806 | | // Add the thread pointer. |
6807 | 0 | SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
6808 | 0 | return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); |
6809 | 0 | } |
6810 | | |
6811 | | // Generate a sequence for accessing the address relative to the thread |
6812 | | // pointer, with the appropriate adjustment for the thread pointer offset. |
6813 | | // This generates the pattern |
6814 | | // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) |
6815 | 0 | SDValue AddrHi = |
6816 | 0 | DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); |
6817 | 0 | SDValue AddrAdd = |
6818 | 0 | DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); |
6819 | 0 | SDValue AddrLo = |
6820 | 0 | DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); |
6821 | |
|
6822 | 0 | SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); |
6823 | 0 | SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); |
6824 | 0 | SDValue MNAdd = |
6825 | 0 | DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); |
6826 | 0 | return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); |
6827 | 0 | } |
6828 | | |
6829 | | SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
6830 | 0 | SelectionDAG &DAG) const { |
6831 | 0 | SDLoc DL(N); |
6832 | 0 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6833 | 0 | IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); |
6834 | 0 | const GlobalValue *GV = N->getGlobal(); |
6835 | | |
6836 | | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
6837 | | // This generates the pattern (PseudoLA_TLS_GD sym), which expands to |
6838 | | // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). |
6839 | 0 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); |
6840 | 0 | SDValue Load = |
6841 | 0 | SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); |
6842 | | |
6843 | | // Prepare argument list to generate call. |
6844 | 0 | ArgListTy Args; |
6845 | 0 | ArgListEntry Entry; |
6846 | 0 | Entry.Node = Load; |
6847 | 0 | Entry.Ty = CallTy; |
6848 | 0 | Args.push_back(Entry); |
6849 | | |
6850 | | // Setup call to __tls_get_addr. |
6851 | 0 | TargetLowering::CallLoweringInfo CLI(DAG); |
6852 | 0 | CLI.setDebugLoc(DL) |
6853 | 0 | .setChain(DAG.getEntryNode()) |
6854 | 0 | .setLibCallee(CallingConv::C, CallTy, |
6855 | 0 | DAG.getExternalSymbol("__tls_get_addr", Ty), |
6856 | 0 | std::move(Args)); |
6857 | |
|
6858 | 0 | return LowerCallTo(CLI).first; |
6859 | 0 | } |
6860 | | |
6861 | | SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
6862 | 0 | SelectionDAG &DAG) const { |
6863 | 0 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); |
6864 | 0 | assert(N->getOffset() == 0 && "unexpected offset in global node"); |
6865 | | |
6866 | 0 | if (DAG.getTarget().useEmulatedTLS()) |
6867 | 0 | return LowerToTLSEmulatedModel(N, DAG); |
6868 | | |
6869 | 0 | TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); |
6870 | |
|
6871 | 0 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
6872 | 0 | CallingConv::GHC) |
6873 | 0 | report_fatal_error("In GHC calling convention TLS is not supported"); |
6874 | |
|
6875 | 0 | SDValue Addr; |
6876 | 0 | switch (Model) { |
6877 | 0 | case TLSModel::LocalExec: |
6878 | 0 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); |
6879 | 0 | break; |
6880 | 0 | case TLSModel::InitialExec: |
6881 | 0 | Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); |
6882 | 0 | break; |
6883 | 0 | case TLSModel::LocalDynamic: |
6884 | 0 | case TLSModel::GeneralDynamic: |
6885 | 0 | Addr = getDynamicTLSAddr(N, DAG); |
6886 | 0 | break; |
6887 | 0 | } |
6888 | | |
6889 | 0 | return Addr; |
6890 | 0 | } |
6891 | | |
6892 | | // Return true if Val is equal to (setcc LHS, RHS, CC). |
6893 | | // Return false if Val is the inverse of (setcc LHS, RHS, CC). |
6894 | | // Otherwise, return std::nullopt. |
6895 | | static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, |
6896 | 0 | ISD::CondCode CC, SDValue Val) { |
6897 | 0 | assert(Val->getOpcode() == ISD::SETCC); |
6898 | 0 | SDValue LHS2 = Val.getOperand(0); |
6899 | 0 | SDValue RHS2 = Val.getOperand(1); |
6900 | 0 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get(); |
6901 | |
|
6902 | 0 | if (LHS == LHS2 && RHS == RHS2) { |
6903 | 0 | if (CC == CC2) |
6904 | 0 | return true; |
6905 | 0 | if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) |
6906 | 0 | return false; |
6907 | 0 | } else if (LHS == RHS2 && RHS == LHS2) { |
6908 | 0 | CC2 = ISD::getSetCCSwappedOperands(CC2); |
6909 | 0 | if (CC == CC2) |
6910 | 0 | return true; |
6911 | 0 | if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType())) |
6912 | 0 | return false; |
6913 | 0 | } |
6914 | | |
6915 | 0 | return std::nullopt; |
6916 | 0 | } |
6917 | | |
6918 | | static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, |
6919 | 663 | const RISCVSubtarget &Subtarget) { |
6920 | 663 | SDValue CondV = N->getOperand(0); |
6921 | 663 | SDValue TrueV = N->getOperand(1); |
6922 | 663 | SDValue FalseV = N->getOperand(2); |
6923 | 663 | MVT VT = N->getSimpleValueType(0); |
6924 | 663 | SDLoc DL(N); |
6925 | | |
6926 | 663 | if (!Subtarget.hasConditionalMoveFusion()) { |
6927 | | // (select c, -1, y) -> -c | y |
6928 | 663 | if (isAllOnesConstant(TrueV)) { |
6929 | 0 | SDValue Neg = DAG.getNegative(CondV, DL, VT); |
6930 | 0 | return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); |
6931 | 0 | } |
6932 | | // (select c, y, -1) -> (c-1) | y |
6933 | 663 | if (isAllOnesConstant(FalseV)) { |
6934 | 0 | SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, |
6935 | 0 | DAG.getAllOnesConstant(DL, VT)); |
6936 | 0 | return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); |
6937 | 0 | } |
6938 | | |
6939 | | // (select c, 0, y) -> (c-1) & y |
6940 | 663 | if (isNullConstant(TrueV)) { |
6941 | 663 | SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, |
6942 | 663 | DAG.getAllOnesConstant(DL, VT)); |
6943 | 663 | return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); |
6944 | 663 | } |
6945 | | // (select c, y, 0) -> -c & y |
6946 | 0 | if (isNullConstant(FalseV)) { |
6947 | 0 | SDValue Neg = DAG.getNegative(CondV, DL, VT); |
6948 | 0 | return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); |
6949 | 0 | } |
6950 | 0 | } |
6951 | | |
6952 | | // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops |
6953 | | // when both truev and falsev are also setcc. |
6954 | 0 | if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && |
6955 | 0 | FalseV.getOpcode() == ISD::SETCC) { |
6956 | 0 | SDValue LHS = CondV.getOperand(0); |
6957 | 0 | SDValue RHS = CondV.getOperand(1); |
6958 | 0 | ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); |
6959 | | |
6960 | | // (select x, x, y) -> x | y |
6961 | | // (select !x, x, y) -> x & y |
6962 | 0 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) { |
6963 | 0 | return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV, |
6964 | 0 | FalseV); |
6965 | 0 | } |
6966 | | // (select x, y, x) -> x & y |
6967 | | // (select !x, y, x) -> x | y |
6968 | 0 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) { |
6969 | 0 | return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV, |
6970 | 0 | FalseV); |
6971 | 0 | } |
6972 | 0 | } |
6973 | | |
6974 | 0 | return SDValue(); |
6975 | 0 | } |
6976 | | |
6977 | | // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants |
6978 | | // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. |
6979 | | // For now we only consider transformation profitable if `binOp(c0, c1)` ends up |
6980 | | // being `0` or `-1`. In such cases we can replace `select` with `and`. |
6981 | | // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize |
6982 | | // than `c0`? |
6983 | | static SDValue |
6984 | | foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, |
6985 | 0 | const RISCVSubtarget &Subtarget) { |
6986 | 0 | if (Subtarget.hasShortForwardBranchOpt()) |
6987 | 0 | return SDValue(); |
6988 | | |
6989 | 0 | unsigned SelOpNo = 0; |
6990 | 0 | SDValue Sel = BO->getOperand(0); |
6991 | 0 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { |
6992 | 0 | SelOpNo = 1; |
6993 | 0 | Sel = BO->getOperand(1); |
6994 | 0 | } |
6995 | |
|
6996 | 0 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) |
6997 | 0 | return SDValue(); |
6998 | | |
6999 | 0 | unsigned ConstSelOpNo = 1; |
7000 | 0 | unsigned OtherSelOpNo = 2; |
7001 | 0 | if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) { |
7002 | 0 | ConstSelOpNo = 2; |
7003 | 0 | OtherSelOpNo = 1; |
7004 | 0 | } |
7005 | 0 | SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo); |
7006 | 0 | ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp); |
7007 | 0 | if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) |
7008 | 0 | return SDValue(); |
7009 | | |
7010 | 0 | SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1); |
7011 | 0 | ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp); |
7012 | 0 | if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) |
7013 | 0 | return SDValue(); |
7014 | | |
7015 | 0 | SDLoc DL(Sel); |
7016 | 0 | EVT VT = BO->getValueType(0); |
7017 | |
|
7018 | 0 | SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; |
7019 | 0 | if (SelOpNo == 1) |
7020 | 0 | std::swap(NewConstOps[0], NewConstOps[1]); |
7021 | |
|
7022 | 0 | SDValue NewConstOp = |
7023 | 0 | DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps); |
7024 | 0 | if (!NewConstOp) |
7025 | 0 | return SDValue(); |
7026 | | |
7027 | 0 | const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); |
7028 | 0 | if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) |
7029 | 0 | return SDValue(); |
7030 | | |
7031 | 0 | SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo); |
7032 | 0 | SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; |
7033 | 0 | if (SelOpNo == 1) |
7034 | 0 | std::swap(NewNonConstOps[0], NewNonConstOps[1]); |
7035 | 0 | SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps); |
7036 | |
|
7037 | 0 | SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; |
7038 | 0 | SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; |
7039 | 0 | return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF); |
7040 | 0 | } |
7041 | | |
7042 | 663 | SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
7043 | 663 | SDValue CondV = Op.getOperand(0); |
7044 | 663 | SDValue TrueV = Op.getOperand(1); |
7045 | 663 | SDValue FalseV = Op.getOperand(2); |
7046 | 663 | SDLoc DL(Op); |
7047 | 663 | MVT VT = Op.getSimpleValueType(); |
7048 | 663 | MVT XLenVT = Subtarget.getXLenVT(); |
7049 | | |
7050 | | // Lower vector SELECTs to VSELECTs by splatting the condition. |
7051 | 663 | if (VT.isVector()) { |
7052 | 0 | MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); |
7053 | 0 | SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); |
7054 | 0 | return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); |
7055 | 0 | } |
7056 | | |
7057 | | // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ |
7058 | | // nodes to implement the SELECT. Performing the lowering here allows for |
7059 | | // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless |
7060 | | // sequence or RISCVISD::SELECT_CC node (branch-based select). |
7061 | 663 | if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && |
7062 | 663 | VT.isScalarInteger()) { |
7063 | | // (select c, t, 0) -> (czero_eqz t, c) |
7064 | 0 | if (isNullConstant(FalseV)) |
7065 | 0 | return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); |
7066 | | // (select c, 0, f) -> (czero_nez f, c) |
7067 | 0 | if (isNullConstant(TrueV)) |
7068 | 0 | return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV); |
7069 | | |
7070 | | // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c)) |
7071 | 0 | if (TrueV.getOpcode() == ISD::AND && |
7072 | 0 | (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) |
7073 | 0 | return DAG.getNode( |
7074 | 0 | ISD::OR, DL, VT, TrueV, |
7075 | 0 | DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); |
7076 | | // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x)) |
7077 | 0 | if (FalseV.getOpcode() == ISD::AND && |
7078 | 0 | (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) |
7079 | 0 | return DAG.getNode( |
7080 | 0 | ISD::OR, DL, VT, FalseV, |
7081 | 0 | DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); |
7082 | | |
7083 | | // Try some other optimizations before falling back to generic lowering. |
7084 | 0 | if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) |
7085 | 0 | return V; |
7086 | | |
7087 | | // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) |
7088 | | // Unless we have the short forward branch optimization. |
7089 | 0 | if (!Subtarget.hasConditionalMoveFusion()) |
7090 | 0 | return DAG.getNode( |
7091 | 0 | ISD::OR, DL, VT, |
7092 | 0 | DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), |
7093 | 0 | DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); |
7094 | 0 | } |
7095 | | |
7096 | 663 | if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) |
7097 | 663 | return V; |
7098 | | |
7099 | 0 | if (Op.hasOneUse()) { |
7100 | 0 | unsigned UseOpc = Op->use_begin()->getOpcode(); |
7101 | 0 | if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { |
7102 | 0 | SDNode *BinOp = *Op->use_begin(); |
7103 | 0 | if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(), |
7104 | 0 | DAG, Subtarget)) { |
7105 | 0 | DAG.ReplaceAllUsesWith(BinOp, &NewSel); |
7106 | 0 | return lowerSELECT(NewSel, DAG); |
7107 | 0 | } |
7108 | 0 | } |
7109 | 0 | } |
7110 | | |
7111 | | // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc)) |
7112 | | // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1))) |
7113 | 0 | const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV); |
7114 | 0 | const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV); |
7115 | 0 | if (FPTV && FPFV) { |
7116 | 0 | if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) |
7117 | 0 | return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); |
7118 | 0 | if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) { |
7119 | 0 | SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, |
7120 | 0 | DAG.getConstant(1, DL, XLenVT)); |
7121 | 0 | return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); |
7122 | 0 | } |
7123 | 0 | } |
7124 | | |
7125 | | // If the condition is not an integer SETCC which operates on XLenVT, we need |
7126 | | // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: |
7127 | | // (select condv, truev, falsev) |
7128 | | // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) |
7129 | 0 | if (CondV.getOpcode() != ISD::SETCC || |
7130 | 0 | CondV.getOperand(0).getSimpleValueType() != XLenVT) { |
7131 | 0 | SDValue Zero = DAG.getConstant(0, DL, XLenVT); |
7132 | 0 | SDValue SetNE = DAG.getCondCode(ISD::SETNE); |
7133 | |
|
7134 | 0 | SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; |
7135 | |
|
7136 | 0 | return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); |
7137 | 0 | } |
7138 | | |
7139 | | // If the CondV is the output of a SETCC node which operates on XLenVT inputs, |
7140 | | // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take |
7141 | | // advantage of the integer compare+branch instructions. i.e.: |
7142 | | // (select (setcc lhs, rhs, cc), truev, falsev) |
7143 | | // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) |
7144 | 0 | SDValue LHS = CondV.getOperand(0); |
7145 | 0 | SDValue RHS = CondV.getOperand(1); |
7146 | 0 | ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); |
7147 | | |
7148 | | // Special case for a select of 2 constants that have a diffence of 1. |
7149 | | // Normally this is done by DAGCombine, but if the select is introduced by |
7150 | | // type legalization or op legalization, we miss it. Restricting to SETLT |
7151 | | // case for now because that is what signed saturating add/sub need. |
7152 | | // FIXME: We don't need the condition to be SETLT or even a SETCC, |
7153 | | // but we would probably want to swap the true/false values if the condition |
7154 | | // is SETGE/SETLE to avoid an XORI. |
7155 | 0 | if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && |
7156 | 0 | CCVal == ISD::SETLT) { |
7157 | 0 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
7158 | 0 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
7159 | 0 | if (TrueVal - 1 == FalseVal) |
7160 | 0 | return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); |
7161 | 0 | if (TrueVal + 1 == FalseVal) |
7162 | 0 | return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); |
7163 | 0 | } |
7164 | | |
7165 | 0 | translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); |
7166 | | // 1 < x ? x : 1 -> 0 < x ? x : 1 |
7167 | 0 | if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && |
7168 | 0 | RHS == TrueV && LHS == FalseV) { |
7169 | 0 | LHS = DAG.getConstant(0, DL, VT); |
7170 | | // 0 <u x is the same as x != 0. |
7171 | 0 | if (CCVal == ISD::SETULT) { |
7172 | 0 | std::swap(LHS, RHS); |
7173 | 0 | CCVal = ISD::SETNE; |
7174 | 0 | } |
7175 | 0 | } |
7176 | | |
7177 | | // x <s -1 ? x : -1 -> x <s 0 ? x : -1 |
7178 | 0 | if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && |
7179 | 0 | RHS == FalseV) { |
7180 | 0 | RHS = DAG.getConstant(0, DL, VT); |
7181 | 0 | } |
7182 | |
|
7183 | 0 | SDValue TargetCC = DAG.getCondCode(CCVal); |
7184 | |
|
7185 | 0 | if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { |
7186 | | // (select (setcc lhs, rhs, CC), constant, falsev) |
7187 | | // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) |
7188 | 0 | std::swap(TrueV, FalseV); |
7189 | 0 | TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); |
7190 | 0 | } |
7191 | |
|
7192 | 0 | SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; |
7193 | 0 | return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); |
7194 | 0 | } |
7195 | | |
7196 | 2.98k | SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
7197 | 2.98k | SDValue CondV = Op.getOperand(1); |
7198 | 2.98k | SDLoc DL(Op); |
7199 | 2.98k | MVT XLenVT = Subtarget.getXLenVT(); |
7200 | | |
7201 | 2.98k | if (CondV.getOpcode() == ISD::SETCC && |
7202 | 2.98k | CondV.getOperand(0).getValueType() == XLenVT) { |
7203 | 1.01k | SDValue LHS = CondV.getOperand(0); |
7204 | 1.01k | SDValue RHS = CondV.getOperand(1); |
7205 | 1.01k | ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); |
7206 | | |
7207 | 1.01k | translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); |
7208 | | |
7209 | 1.01k | SDValue TargetCC = DAG.getCondCode(CCVal); |
7210 | 1.01k | return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), |
7211 | 1.01k | LHS, RHS, TargetCC, Op.getOperand(2)); |
7212 | 1.01k | } |
7213 | | |
7214 | 1.96k | return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), |
7215 | 1.96k | CondV, DAG.getConstant(0, DL, XLenVT), |
7216 | 1.96k | DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); |
7217 | 2.98k | } |
7218 | | |
7219 | 0 | SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
7220 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
7221 | 0 | RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); |
7222 | |
|
7223 | 0 | SDLoc DL(Op); |
7224 | 0 | SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), |
7225 | 0 | getPointerTy(MF.getDataLayout())); |
7226 | | |
7227 | | // vastart just stores the address of the VarArgsFrameIndex slot into the |
7228 | | // memory location argument. |
7229 | 0 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
7230 | 0 | return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), |
7231 | 0 | MachinePointerInfo(SV)); |
7232 | 0 | } |
7233 | | |
7234 | | SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, |
7235 | 0 | SelectionDAG &DAG) const { |
7236 | 0 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7237 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
7238 | 0 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7239 | 0 | MFI.setFrameAddressIsTaken(true); |
7240 | 0 | Register FrameReg = RI.getFrameRegister(MF); |
7241 | 0 | int XLenInBytes = Subtarget.getXLen() / 8; |
7242 | |
|
7243 | 0 | EVT VT = Op.getValueType(); |
7244 | 0 | SDLoc DL(Op); |
7245 | 0 | SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); |
7246 | 0 | unsigned Depth = Op.getConstantOperandVal(0); |
7247 | 0 | while (Depth--) { |
7248 | 0 | int Offset = -(XLenInBytes * 2); |
7249 | 0 | SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, |
7250 | 0 | DAG.getIntPtrConstant(Offset, DL)); |
7251 | 0 | FrameAddr = |
7252 | 0 | DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); |
7253 | 0 | } |
7254 | 0 | return FrameAddr; |
7255 | 0 | } |
7256 | | |
7257 | | SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, |
7258 | 0 | SelectionDAG &DAG) const { |
7259 | 0 | const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); |
7260 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
7261 | 0 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7262 | 0 | MFI.setReturnAddressIsTaken(true); |
7263 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
7264 | 0 | int XLenInBytes = Subtarget.getXLen() / 8; |
7265 | |
|
7266 | 0 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
7267 | 0 | return SDValue(); |
7268 | | |
7269 | 0 | EVT VT = Op.getValueType(); |
7270 | 0 | SDLoc DL(Op); |
7271 | 0 | unsigned Depth = Op.getConstantOperandVal(0); |
7272 | 0 | if (Depth) { |
7273 | 0 | int Off = -XLenInBytes; |
7274 | 0 | SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
7275 | 0 | SDValue Offset = DAG.getConstant(Off, DL, VT); |
7276 | 0 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), |
7277 | 0 | DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), |
7278 | 0 | MachinePointerInfo()); |
7279 | 0 | } |
7280 | | |
7281 | | // Return the value of the return address register, marking it an implicit |
7282 | | // live-in. |
7283 | 0 | Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); |
7284 | 0 | return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); |
7285 | 0 | } |
7286 | | |
7287 | | SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, |
7288 | 0 | SelectionDAG &DAG) const { |
7289 | 0 | SDLoc DL(Op); |
7290 | 0 | SDValue Lo = Op.getOperand(0); |
7291 | 0 | SDValue Hi = Op.getOperand(1); |
7292 | 0 | SDValue Shamt = Op.getOperand(2); |
7293 | 0 | EVT VT = Lo.getValueType(); |
7294 | | |
7295 | | // if Shamt-XLEN < 0: // Shamt < XLEN |
7296 | | // Lo = Lo << Shamt |
7297 | | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) |
7298 | | // else: |
7299 | | // Lo = 0 |
7300 | | // Hi = Lo << (Shamt-XLEN) |
7301 | |
|
7302 | 0 | SDValue Zero = DAG.getConstant(0, DL, VT); |
7303 | 0 | SDValue One = DAG.getConstant(1, DL, VT); |
7304 | 0 | SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); |
7305 | 0 | SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); |
7306 | 0 | SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); |
7307 | 0 | SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); |
7308 | |
|
7309 | 0 | SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); |
7310 | 0 | SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); |
7311 | 0 | SDValue ShiftRightLo = |
7312 | 0 | DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); |
7313 | 0 | SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); |
7314 | 0 | SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); |
7315 | 0 | SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); |
7316 | |
|
7317 | 0 | SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); |
7318 | |
|
7319 | 0 | Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); |
7320 | 0 | Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); |
7321 | |
|
7322 | 0 | SDValue Parts[2] = {Lo, Hi}; |
7323 | 0 | return DAG.getMergeValues(Parts, DL); |
7324 | 0 | } |
7325 | | |
7326 | | SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, |
7327 | 0 | bool IsSRA) const { |
7328 | 0 | SDLoc DL(Op); |
7329 | 0 | SDValue Lo = Op.getOperand(0); |
7330 | 0 | SDValue Hi = Op.getOperand(1); |
7331 | 0 | SDValue Shamt = Op.getOperand(2); |
7332 | 0 | EVT VT = Lo.getValueType(); |
7333 | | |
7334 | | // SRA expansion: |
7335 | | // if Shamt-XLEN < 0: // Shamt < XLEN |
7336 | | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
7337 | | // Hi = Hi >>s Shamt |
7338 | | // else: |
7339 | | // Lo = Hi >>s (Shamt-XLEN); |
7340 | | // Hi = Hi >>s (XLEN-1) |
7341 | | // |
7342 | | // SRL expansion: |
7343 | | // if Shamt-XLEN < 0: // Shamt < XLEN |
7344 | | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt)) |
7345 | | // Hi = Hi >>u Shamt |
7346 | | // else: |
7347 | | // Lo = Hi >>u (Shamt-XLEN); |
7348 | | // Hi = 0; |
7349 | |
|
7350 | 0 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
7351 | |
|
7352 | 0 | SDValue Zero = DAG.getConstant(0, DL, VT); |
7353 | 0 | SDValue One = DAG.getConstant(1, DL, VT); |
7354 | 0 | SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); |
7355 | 0 | SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); |
7356 | 0 | SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); |
7357 | 0 | SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); |
7358 | |
|
7359 | 0 | SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); |
7360 | 0 | SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); |
7361 | 0 | SDValue ShiftLeftHi = |
7362 | 0 | DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); |
7363 | 0 | SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); |
7364 | 0 | SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); |
7365 | 0 | SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); |
7366 | 0 | SDValue HiFalse = |
7367 | 0 | IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; |
7368 | |
|
7369 | 0 | SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); |
7370 | |
|
7371 | 0 | Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); |
7372 | 0 | Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); |
7373 | |
|
7374 | 0 | SDValue Parts[2] = {Lo, Hi}; |
7375 | 0 | return DAG.getMergeValues(Parts, DL); |
7376 | 0 | } |
7377 | | |
7378 | | // Lower splats of i1 types to SETCC. For each mask vector type, we have a |
7379 | | // legal equivalently-sized i8 type, so we can use that as a go-between. |
7380 | | SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, |
7381 | 0 | SelectionDAG &DAG) const { |
7382 | 0 | SDLoc DL(Op); |
7383 | 0 | MVT VT = Op.getSimpleValueType(); |
7384 | 0 | SDValue SplatVal = Op.getOperand(0); |
7385 | | // All-zeros or all-ones splats are handled specially. |
7386 | 0 | if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { |
7387 | 0 | SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; |
7388 | 0 | return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); |
7389 | 0 | } |
7390 | 0 | if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { |
7391 | 0 | SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; |
7392 | 0 | return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); |
7393 | 0 | } |
7394 | 0 | MVT InterVT = VT.changeVectorElementType(MVT::i8); |
7395 | 0 | SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal, |
7396 | 0 | DAG.getConstant(1, DL, SplatVal.getValueType())); |
7397 | 0 | SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); |
7398 | 0 | SDValue Zero = DAG.getConstant(0, DL, InterVT); |
7399 | 0 | return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); |
7400 | 0 | } |
7401 | | |
7402 | | // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is |
7403 | | // illegal (currently only vXi64 RV32). |
7404 | | // FIXME: We could also catch non-constant sign-extended i32 values and lower |
7405 | | // them to VMV_V_X_VL. |
7406 | | SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, |
7407 | 0 | SelectionDAG &DAG) const { |
7408 | 0 | SDLoc DL(Op); |
7409 | 0 | MVT VecVT = Op.getSimpleValueType(); |
7410 | 0 | assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && |
7411 | 0 | "Unexpected SPLAT_VECTOR_PARTS lowering"); |
7412 | | |
7413 | 0 | assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); |
7414 | 0 | SDValue Lo = Op.getOperand(0); |
7415 | 0 | SDValue Hi = Op.getOperand(1); |
7416 | |
|
7417 | 0 | MVT ContainerVT = VecVT; |
7418 | 0 | if (VecVT.isFixedLengthVector()) |
7419 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
7420 | |
|
7421 | 0 | auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
7422 | |
|
7423 | 0 | SDValue Res = |
7424 | 0 | splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); |
7425 | |
|
7426 | 0 | if (VecVT.isFixedLengthVector()) |
7427 | 0 | Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget); |
7428 | |
|
7429 | 0 | return Res; |
7430 | 0 | } |
7431 | | |
7432 | | // Custom-lower extensions from mask vectors by using a vselect either with 1 |
7433 | | // for zero/any-extension or -1 for sign-extension: |
7434 | | // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) |
7435 | | // Note that any-extension is lowered identically to zero-extension. |
7436 | | SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, |
7437 | 0 | int64_t ExtTrueVal) const { |
7438 | 0 | SDLoc DL(Op); |
7439 | 0 | MVT VecVT = Op.getSimpleValueType(); |
7440 | 0 | SDValue Src = Op.getOperand(0); |
7441 | | // Only custom-lower extensions from mask types |
7442 | 0 | assert(Src.getValueType().isVector() && |
7443 | 0 | Src.getValueType().getVectorElementType() == MVT::i1); |
7444 | | |
7445 | 0 | if (VecVT.isScalableVector()) { |
7446 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, VecVT); |
7447 | 0 | SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); |
7448 | 0 | return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); |
7449 | 0 | } |
7450 | | |
7451 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VecVT); |
7452 | 0 | MVT I1ContainerVT = |
7453 | 0 | MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
7454 | |
|
7455 | 0 | SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); |
7456 | |
|
7457 | 0 | SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; |
7458 | |
|
7459 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
7460 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); |
7461 | 0 | SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); |
7462 | |
|
7463 | 0 | SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
7464 | 0 | DAG.getUNDEF(ContainerVT), SplatZero, VL); |
7465 | 0 | SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
7466 | 0 | DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); |
7467 | 0 | SDValue Select = |
7468 | 0 | DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal, |
7469 | 0 | SplatZero, DAG.getUNDEF(ContainerVT), VL); |
7470 | |
|
7471 | 0 | return convertFromScalableVector(VecVT, Select, DAG, Subtarget); |
7472 | 0 | } |
7473 | | |
7474 | | SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( |
7475 | 0 | SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { |
7476 | 0 | MVT ExtVT = Op.getSimpleValueType(); |
7477 | | // Only custom-lower extensions from fixed-length vector types. |
7478 | 0 | if (!ExtVT.isFixedLengthVector()) |
7479 | 0 | return Op; |
7480 | 0 | MVT VT = Op.getOperand(0).getSimpleValueType(); |
7481 | | // Grab the canonical container type for the extended type. Infer the smaller |
7482 | | // type from that to ensure the same number of vector elements, as we know |
7483 | | // the LMUL will be sufficient to hold the smaller type. |
7484 | 0 | MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); |
7485 | | // Get the extended container type manually to ensure the same number of |
7486 | | // vector elements between source and dest. |
7487 | 0 | MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), |
7488 | 0 | ContainerExtVT.getVectorElementCount()); |
7489 | |
|
7490 | 0 | SDValue Op1 = |
7491 | 0 | convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); |
7492 | |
|
7493 | 0 | SDLoc DL(Op); |
7494 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
7495 | |
|
7496 | 0 | SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); |
7497 | |
|
7498 | 0 | return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); |
7499 | 0 | } |
7500 | | |
7501 | | // Custom-lower truncations from vectors to mask vectors by using a mask and a |
7502 | | // setcc operation: |
7503 | | // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) |
7504 | | SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, |
7505 | 0 | SelectionDAG &DAG) const { |
7506 | 0 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
7507 | 0 | SDLoc DL(Op); |
7508 | 0 | EVT MaskVT = Op.getValueType(); |
7509 | | // Only expect to custom-lower truncations to mask types |
7510 | 0 | assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && |
7511 | 0 | "Unexpected type for vector mask lowering"); |
7512 | 0 | SDValue Src = Op.getOperand(0); |
7513 | 0 | MVT VecVT = Src.getSimpleValueType(); |
7514 | 0 | SDValue Mask, VL; |
7515 | 0 | if (IsVPTrunc) { |
7516 | 0 | Mask = Op.getOperand(1); |
7517 | 0 | VL = Op.getOperand(2); |
7518 | 0 | } |
7519 | | // If this is a fixed vector, we need to convert it to a scalable vector. |
7520 | 0 | MVT ContainerVT = VecVT; |
7521 | |
|
7522 | 0 | if (VecVT.isFixedLengthVector()) { |
7523 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
7524 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
7525 | 0 | if (IsVPTrunc) { |
7526 | 0 | MVT MaskContainerVT = |
7527 | 0 | getContainerForFixedLengthVector(Mask.getSimpleValueType()); |
7528 | 0 | Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); |
7529 | 0 | } |
7530 | 0 | } |
7531 | |
|
7532 | 0 | if (!IsVPTrunc) { |
7533 | 0 | std::tie(Mask, VL) = |
7534 | 0 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
7535 | 0 | } |
7536 | |
|
7537 | 0 | SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); |
7538 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); |
7539 | |
|
7540 | 0 | SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
7541 | 0 | DAG.getUNDEF(ContainerVT), SplatOne, VL); |
7542 | 0 | SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
7543 | 0 | DAG.getUNDEF(ContainerVT), SplatZero, VL); |
7544 | |
|
7545 | 0 | MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); |
7546 | 0 | SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, |
7547 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL); |
7548 | 0 | Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, |
7549 | 0 | {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), |
7550 | 0 | DAG.getUNDEF(MaskContainerVT), Mask, VL}); |
7551 | 0 | if (MaskVT.isFixedLengthVector()) |
7552 | 0 | Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); |
7553 | 0 | return Trunc; |
7554 | 0 | } |
7555 | | |
7556 | | SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, |
7557 | 0 | SelectionDAG &DAG) const { |
7558 | 0 | bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; |
7559 | 0 | SDLoc DL(Op); |
7560 | |
|
7561 | 0 | MVT VT = Op.getSimpleValueType(); |
7562 | | // Only custom-lower vector truncates |
7563 | 0 | assert(VT.isVector() && "Unexpected type for vector truncate lowering"); |
7564 | | |
7565 | | // Truncates to mask types are handled differently |
7566 | 0 | if (VT.getVectorElementType() == MVT::i1) |
7567 | 0 | return lowerVectorMaskTruncLike(Op, DAG); |
7568 | | |
7569 | | // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary |
7570 | | // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which |
7571 | | // truncate by one power of two at a time. |
7572 | 0 | MVT DstEltVT = VT.getVectorElementType(); |
7573 | |
|
7574 | 0 | SDValue Src = Op.getOperand(0); |
7575 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
7576 | 0 | MVT SrcEltVT = SrcVT.getVectorElementType(); |
7577 | |
|
7578 | 0 | assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && |
7579 | 0 | isPowerOf2_64(SrcEltVT.getSizeInBits()) && |
7580 | 0 | "Unexpected vector truncate lowering"); |
7581 | | |
7582 | 0 | MVT ContainerVT = SrcVT; |
7583 | 0 | SDValue Mask, VL; |
7584 | 0 | if (IsVPTrunc) { |
7585 | 0 | Mask = Op.getOperand(1); |
7586 | 0 | VL = Op.getOperand(2); |
7587 | 0 | } |
7588 | 0 | if (SrcVT.isFixedLengthVector()) { |
7589 | 0 | ContainerVT = getContainerForFixedLengthVector(SrcVT); |
7590 | 0 | Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); |
7591 | 0 | if (IsVPTrunc) { |
7592 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
7593 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
7594 | 0 | } |
7595 | 0 | } |
7596 | |
|
7597 | 0 | SDValue Result = Src; |
7598 | 0 | if (!IsVPTrunc) { |
7599 | 0 | std::tie(Mask, VL) = |
7600 | 0 | getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); |
7601 | 0 | } |
7602 | |
|
7603 | 0 | LLVMContext &Context = *DAG.getContext(); |
7604 | 0 | const ElementCount Count = ContainerVT.getVectorElementCount(); |
7605 | 0 | do { |
7606 | 0 | SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); |
7607 | 0 | EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); |
7608 | 0 | Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, |
7609 | 0 | Mask, VL); |
7610 | 0 | } while (SrcEltVT != DstEltVT); |
7611 | |
|
7612 | 0 | if (SrcVT.isFixedLengthVector()) |
7613 | 0 | Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
7614 | |
|
7615 | 0 | return Result; |
7616 | 0 | } |
7617 | | |
7618 | | SDValue |
7619 | | RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op, |
7620 | 0 | SelectionDAG &DAG) const { |
7621 | 0 | SDLoc DL(Op); |
7622 | 0 | SDValue Chain = Op.getOperand(0); |
7623 | 0 | SDValue Src = Op.getOperand(1); |
7624 | 0 | MVT VT = Op.getSimpleValueType(); |
7625 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
7626 | 0 | MVT ContainerVT = VT; |
7627 | 0 | if (VT.isFixedLengthVector()) { |
7628 | 0 | MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); |
7629 | 0 | ContainerVT = |
7630 | 0 | SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); |
7631 | 0 | Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
7632 | 0 | } |
7633 | |
|
7634 | 0 | auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); |
7635 | | |
7636 | | // RVV can only widen/truncate fp to types double/half the size as the source. |
7637 | 0 | if ((VT.getVectorElementType() == MVT::f64 && |
7638 | 0 | SrcVT.getVectorElementType() == MVT::f16) || |
7639 | 0 | (VT.getVectorElementType() == MVT::f16 && |
7640 | 0 | SrcVT.getVectorElementType() == MVT::f64)) { |
7641 | | // For double rounding, the intermediate rounding should be round-to-odd. |
7642 | 0 | unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
7643 | 0 | ? RISCVISD::STRICT_FP_EXTEND_VL |
7644 | 0 | : RISCVISD::STRICT_VFNCVT_ROD_VL; |
7645 | 0 | MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); |
7646 | 0 | Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other), |
7647 | 0 | Chain, Src, Mask, VL); |
7648 | 0 | Chain = Src.getValue(1); |
7649 | 0 | } |
7650 | |
|
7651 | 0 | unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND |
7652 | 0 | ? RISCVISD::STRICT_FP_EXTEND_VL |
7653 | 0 | : RISCVISD::STRICT_FP_ROUND_VL; |
7654 | 0 | SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), |
7655 | 0 | Chain, Src, Mask, VL); |
7656 | 0 | if (VT.isFixedLengthVector()) { |
7657 | | // StrictFP operations have two result values. Their lowered result should |
7658 | | // have same result count. |
7659 | 0 | SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); |
7660 | 0 | Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); |
7661 | 0 | } |
7662 | 0 | return Res; |
7663 | 0 | } |
7664 | | |
7665 | | SDValue |
7666 | | RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, |
7667 | 0 | SelectionDAG &DAG) const { |
7668 | 0 | bool IsVP = |
7669 | 0 | Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; |
7670 | 0 | bool IsExtend = |
7671 | 0 | Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; |
7672 | | // RVV can only do truncate fp to types half the size as the source. We |
7673 | | // custom-lower f64->f16 rounds via RVV's round-to-odd float |
7674 | | // conversion instruction. |
7675 | 0 | SDLoc DL(Op); |
7676 | 0 | MVT VT = Op.getSimpleValueType(); |
7677 | |
|
7678 | 0 | assert(VT.isVector() && "Unexpected type for vector truncate lowering"); |
7679 | | |
7680 | 0 | SDValue Src = Op.getOperand(0); |
7681 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
7682 | |
|
7683 | 0 | bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || |
7684 | 0 | SrcVT.getVectorElementType() != MVT::f16); |
7685 | 0 | bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || |
7686 | 0 | SrcVT.getVectorElementType() != MVT::f64); |
7687 | |
|
7688 | 0 | bool IsDirectConv = IsDirectExtend || IsDirectTrunc; |
7689 | | |
7690 | | // Prepare any fixed-length vector operands. |
7691 | 0 | MVT ContainerVT = VT; |
7692 | 0 | SDValue Mask, VL; |
7693 | 0 | if (IsVP) { |
7694 | 0 | Mask = Op.getOperand(1); |
7695 | 0 | VL = Op.getOperand(2); |
7696 | 0 | } |
7697 | 0 | if (VT.isFixedLengthVector()) { |
7698 | 0 | MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); |
7699 | 0 | ContainerVT = |
7700 | 0 | SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); |
7701 | 0 | Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); |
7702 | 0 | if (IsVP) { |
7703 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
7704 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
7705 | 0 | } |
7706 | 0 | } |
7707 | |
|
7708 | 0 | if (!IsVP) |
7709 | 0 | std::tie(Mask, VL) = |
7710 | 0 | getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); |
7711 | |
|
7712 | 0 | unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; |
7713 | |
|
7714 | 0 | if (IsDirectConv) { |
7715 | 0 | Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); |
7716 | 0 | if (VT.isFixedLengthVector()) |
7717 | 0 | Src = convertFromScalableVector(VT, Src, DAG, Subtarget); |
7718 | 0 | return Src; |
7719 | 0 | } |
7720 | | |
7721 | 0 | unsigned InterConvOpc = |
7722 | 0 | IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; |
7723 | |
|
7724 | 0 | MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); |
7725 | 0 | SDValue IntermediateConv = |
7726 | 0 | DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); |
7727 | 0 | SDValue Result = |
7728 | 0 | DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); |
7729 | 0 | if (VT.isFixedLengthVector()) |
7730 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
7731 | 0 | return Result; |
7732 | 0 | } |
7733 | | |
7734 | | // Given a scalable vector type and an index into it, returns the type for the |
7735 | | // smallest subvector that the index fits in. This can be used to reduce LMUL |
7736 | | // for operations like vslidedown. |
7737 | | // |
7738 | | // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. |
7739 | | static std::optional<MVT> |
7740 | | getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, |
7741 | 0 | const RISCVSubtarget &Subtarget) { |
7742 | 0 | assert(VecVT.isScalableVector()); |
7743 | 0 | const unsigned EltSize = VecVT.getScalarSizeInBits(); |
7744 | 0 | const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); |
7745 | 0 | const unsigned MinVLMAX = VectorBitsMin / EltSize; |
7746 | 0 | MVT SmallerVT; |
7747 | 0 | if (MaxIdx < MinVLMAX) |
7748 | 0 | SmallerVT = getLMUL1VT(VecVT); |
7749 | 0 | else if (MaxIdx < MinVLMAX * 2) |
7750 | 0 | SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); |
7751 | 0 | else if (MaxIdx < MinVLMAX * 4) |
7752 | 0 | SmallerVT = getLMUL1VT(VecVT) |
7753 | 0 | .getDoubleNumVectorElementsVT() |
7754 | 0 | .getDoubleNumVectorElementsVT(); |
7755 | 0 | if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) |
7756 | 0 | return std::nullopt; |
7757 | 0 | return SmallerVT; |
7758 | 0 | } |
7759 | | |
7760 | | // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the |
7761 | | // first position of a vector, and that vector is slid up to the insert index. |
7762 | | // By limiting the active vector length to index+1 and merging with the |
7763 | | // original vector (with an undisturbed tail policy for elements >= VL), we |
7764 | | // achieve the desired result of leaving all elements untouched except the one |
7765 | | // at VL-1, which is replaced with the desired value. |
7766 | | SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
7767 | 0 | SelectionDAG &DAG) const { |
7768 | 0 | SDLoc DL(Op); |
7769 | 0 | MVT VecVT = Op.getSimpleValueType(); |
7770 | 0 | SDValue Vec = Op.getOperand(0); |
7771 | 0 | SDValue Val = Op.getOperand(1); |
7772 | 0 | SDValue Idx = Op.getOperand(2); |
7773 | |
|
7774 | 0 | if (VecVT.getVectorElementType() == MVT::i1) { |
7775 | | // FIXME: For now we just promote to an i8 vector and insert into that, |
7776 | | // but this is probably not optimal. |
7777 | 0 | MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
7778 | 0 | Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); |
7779 | 0 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); |
7780 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); |
7781 | 0 | } |
7782 | | |
7783 | 0 | MVT ContainerVT = VecVT; |
7784 | | // If the operand is a fixed-length vector, convert to a scalable one. |
7785 | 0 | if (VecVT.isFixedLengthVector()) { |
7786 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
7787 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
7788 | 0 | } |
7789 | | |
7790 | | // If we know the index we're going to insert at, we can shrink Vec so that |
7791 | | // we're performing the scalar inserts and slideup on a smaller LMUL. |
7792 | 0 | MVT OrigContainerVT = ContainerVT; |
7793 | 0 | SDValue OrigVec = Vec; |
7794 | 0 | SDValue AlignedIdx; |
7795 | 0 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) { |
7796 | 0 | const unsigned OrigIdx = IdxC->getZExtValue(); |
7797 | | // Do we know an upper bound on LMUL? |
7798 | 0 | if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx, |
7799 | 0 | DL, DAG, Subtarget)) { |
7800 | 0 | ContainerVT = *ShrunkVT; |
7801 | 0 | AlignedIdx = DAG.getVectorIdxConstant(0, DL); |
7802 | 0 | } |
7803 | | |
7804 | | // If we're compiling for an exact VLEN value, we can always perform |
7805 | | // the insert in m1 as we can determine the register corresponding to |
7806 | | // the index in the register group. |
7807 | 0 | const unsigned MinVLen = Subtarget.getRealMinVLen(); |
7808 | 0 | const unsigned MaxVLen = Subtarget.getRealMaxVLen(); |
7809 | 0 | const MVT M1VT = getLMUL1VT(ContainerVT); |
7810 | 0 | if (MinVLen == MaxVLen && ContainerVT.bitsGT(M1VT)) { |
7811 | 0 | EVT ElemVT = VecVT.getVectorElementType(); |
7812 | 0 | unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); |
7813 | 0 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
7814 | 0 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
7815 | 0 | unsigned ExtractIdx = |
7816 | 0 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
7817 | 0 | AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL); |
7818 | 0 | Idx = DAG.getVectorIdxConstant(RemIdx, DL); |
7819 | 0 | ContainerVT = M1VT; |
7820 | 0 | } |
7821 | |
|
7822 | 0 | if (AlignedIdx) |
7823 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, |
7824 | 0 | AlignedIdx); |
7825 | 0 | } |
7826 | |
|
7827 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
7828 | |
|
7829 | 0 | bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; |
7830 | | // Even i64-element vectors on RV32 can be lowered without scalar |
7831 | | // legalization if the most-significant 32 bits of the value are not affected |
7832 | | // by the sign-extension of the lower 32 bits. |
7833 | | // TODO: We could also catch sign extensions of a 32-bit value. |
7834 | 0 | if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { |
7835 | 0 | const auto *CVal = cast<ConstantSDNode>(Val); |
7836 | 0 | if (isInt<32>(CVal->getSExtValue())) { |
7837 | 0 | IsLegalInsert = true; |
7838 | 0 | Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); |
7839 | 0 | } |
7840 | 0 | } |
7841 | |
|
7842 | 0 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
7843 | |
|
7844 | 0 | SDValue ValInVec; |
7845 | |
|
7846 | 0 | if (IsLegalInsert) { |
7847 | 0 | unsigned Opc = |
7848 | 0 | VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; |
7849 | 0 | if (isNullConstant(Idx)) { |
7850 | 0 | if (!VecVT.isFloatingPoint()) |
7851 | 0 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val); |
7852 | 0 | Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); |
7853 | |
|
7854 | 0 | if (AlignedIdx) |
7855 | 0 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, |
7856 | 0 | Vec, AlignedIdx); |
7857 | 0 | if (!VecVT.isFixedLengthVector()) |
7858 | 0 | return Vec; |
7859 | 0 | return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); |
7860 | 0 | } |
7861 | 0 | ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); |
7862 | 0 | } else { |
7863 | | // On RV32, i64-element vectors must be specially handled to place the |
7864 | | // value at element 0, by using two vslide1down instructions in sequence on |
7865 | | // the i32 split lo/hi value. Use an equivalently-sized i32 vector for |
7866 | | // this. |
7867 | 0 | SDValue ValLo, ValHi; |
7868 | 0 | std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32); |
7869 | 0 | MVT I32ContainerVT = |
7870 | 0 | MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); |
7871 | 0 | SDValue I32Mask = |
7872 | 0 | getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; |
7873 | | // Limit the active VL to two. |
7874 | 0 | SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); |
7875 | | // If the Idx is 0 we can insert directly into the vector. |
7876 | 0 | if (isNullConstant(Idx)) { |
7877 | | // First slide in the lo value, then the hi in above it. We use slide1down |
7878 | | // to avoid the register group overlap constraint of vslide1up. |
7879 | 0 | ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, |
7880 | 0 | Vec, Vec, ValLo, I32Mask, InsertI64VL); |
7881 | | // If the source vector is undef don't pass along the tail elements from |
7882 | | // the previous slide1down. |
7883 | 0 | SDValue Tail = Vec.isUndef() ? Vec : ValInVec; |
7884 | 0 | ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, |
7885 | 0 | Tail, ValInVec, ValHi, I32Mask, InsertI64VL); |
7886 | | // Bitcast back to the right container type. |
7887 | 0 | ValInVec = DAG.getBitcast(ContainerVT, ValInVec); |
7888 | |
|
7889 | 0 | if (AlignedIdx) |
7890 | 0 | ValInVec = |
7891 | 0 | DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, |
7892 | 0 | ValInVec, AlignedIdx); |
7893 | 0 | if (!VecVT.isFixedLengthVector()) |
7894 | 0 | return ValInVec; |
7895 | 0 | return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); |
7896 | 0 | } |
7897 | | |
7898 | | // First slide in the lo value, then the hi in above it. We use slide1down |
7899 | | // to avoid the register group overlap constraint of vslide1up. |
7900 | 0 | ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, |
7901 | 0 | DAG.getUNDEF(I32ContainerVT), |
7902 | 0 | DAG.getUNDEF(I32ContainerVT), ValLo, |
7903 | 0 | I32Mask, InsertI64VL); |
7904 | 0 | ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, |
7905 | 0 | DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, |
7906 | 0 | I32Mask, InsertI64VL); |
7907 | | // Bitcast back to the right container type. |
7908 | 0 | ValInVec = DAG.getBitcast(ContainerVT, ValInVec); |
7909 | 0 | } |
7910 | | |
7911 | | // Now that the value is in a vector, slide it into position. |
7912 | 0 | SDValue InsertVL = |
7913 | 0 | DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); |
7914 | | |
7915 | | // Use tail agnostic policy if Idx is the last index of Vec. |
7916 | 0 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
7917 | 0 | if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && |
7918 | 0 | Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) |
7919 | 0 | Policy = RISCVII::TAIL_AGNOSTIC; |
7920 | 0 | SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, |
7921 | 0 | Idx, Mask, InsertVL, Policy); |
7922 | |
|
7923 | 0 | if (AlignedIdx) |
7924 | 0 | Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, |
7925 | 0 | Slideup, AlignedIdx); |
7926 | 0 | if (!VecVT.isFixedLengthVector()) |
7927 | 0 | return Slideup; |
7928 | 0 | return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); |
7929 | 0 | } |
7930 | | |
7931 | | // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then |
7932 | | // extract the first element: (extractelt (slidedown vec, idx), 0). For integer |
7933 | | // types this is done using VMV_X_S to allow us to glean information about the |
7934 | | // sign bits of the result. |
7935 | | SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, |
7936 | 0 | SelectionDAG &DAG) const { |
7937 | 0 | SDLoc DL(Op); |
7938 | 0 | SDValue Idx = Op.getOperand(1); |
7939 | 0 | SDValue Vec = Op.getOperand(0); |
7940 | 0 | EVT EltVT = Op.getValueType(); |
7941 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
7942 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
7943 | |
|
7944 | 0 | if (VecVT.getVectorElementType() == MVT::i1) { |
7945 | | // Use vfirst.m to extract the first bit. |
7946 | 0 | if (isNullConstant(Idx)) { |
7947 | 0 | MVT ContainerVT = VecVT; |
7948 | 0 | if (VecVT.isFixedLengthVector()) { |
7949 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
7950 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
7951 | 0 | } |
7952 | 0 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
7953 | 0 | SDValue Vfirst = |
7954 | 0 | DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); |
7955 | 0 | SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst, |
7956 | 0 | DAG.getConstant(0, DL, XLenVT), ISD::SETEQ); |
7957 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); |
7958 | 0 | } |
7959 | 0 | if (VecVT.isFixedLengthVector()) { |
7960 | 0 | unsigned NumElts = VecVT.getVectorNumElements(); |
7961 | 0 | if (NumElts >= 8) { |
7962 | 0 | MVT WideEltVT; |
7963 | 0 | unsigned WidenVecLen; |
7964 | 0 | SDValue ExtractElementIdx; |
7965 | 0 | SDValue ExtractBitIdx; |
7966 | 0 | unsigned MaxEEW = Subtarget.getELen(); |
7967 | 0 | MVT LargestEltVT = MVT::getIntegerVT( |
7968 | 0 | std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); |
7969 | 0 | if (NumElts <= LargestEltVT.getSizeInBits()) { |
7970 | 0 | assert(isPowerOf2_32(NumElts) && |
7971 | 0 | "the number of elements should be power of 2"); |
7972 | 0 | WideEltVT = MVT::getIntegerVT(NumElts); |
7973 | 0 | WidenVecLen = 1; |
7974 | 0 | ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); |
7975 | 0 | ExtractBitIdx = Idx; |
7976 | 0 | } else { |
7977 | 0 | WideEltVT = LargestEltVT; |
7978 | 0 | WidenVecLen = NumElts / WideEltVT.getSizeInBits(); |
7979 | | // extract element index = index / element width |
7980 | 0 | ExtractElementIdx = DAG.getNode( |
7981 | 0 | ISD::SRL, DL, XLenVT, Idx, |
7982 | 0 | DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); |
7983 | | // mask bit index = index % element width |
7984 | 0 | ExtractBitIdx = DAG.getNode( |
7985 | 0 | ISD::AND, DL, XLenVT, Idx, |
7986 | 0 | DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); |
7987 | 0 | } |
7988 | 0 | MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); |
7989 | 0 | Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); |
7990 | 0 | SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, |
7991 | 0 | Vec, ExtractElementIdx); |
7992 | | // Extract the bit from GPR. |
7993 | 0 | SDValue ShiftRight = |
7994 | 0 | DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); |
7995 | 0 | SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, |
7996 | 0 | DAG.getConstant(1, DL, XLenVT)); |
7997 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res); |
7998 | 0 | } |
7999 | 0 | } |
8000 | | // Otherwise, promote to an i8 vector and extract from that. |
8001 | 0 | MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
8002 | 0 | Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); |
8003 | 0 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); |
8004 | 0 | } |
8005 | | |
8006 | | // If this is a fixed vector, we need to convert it to a scalable vector. |
8007 | 0 | MVT ContainerVT = VecVT; |
8008 | 0 | if (VecVT.isFixedLengthVector()) { |
8009 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
8010 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
8011 | 0 | } |
8012 | | |
8013 | | // If we're compiling for an exact VLEN value and we have a known |
8014 | | // constant index, we can always perform the extract in m1 (or |
8015 | | // smaller) as we can determine the register corresponding to |
8016 | | // the index in the register group. |
8017 | 0 | const unsigned MinVLen = Subtarget.getRealMinVLen(); |
8018 | 0 | const unsigned MaxVLen = Subtarget.getRealMaxVLen(); |
8019 | 0 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx); |
8020 | 0 | IdxC && MinVLen == MaxVLen && |
8021 | 0 | VecVT.getSizeInBits().getKnownMinValue() > MinVLen) { |
8022 | 0 | MVT M1VT = getLMUL1VT(ContainerVT); |
8023 | 0 | unsigned OrigIdx = IdxC->getZExtValue(); |
8024 | 0 | EVT ElemVT = VecVT.getVectorElementType(); |
8025 | 0 | unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); |
8026 | 0 | unsigned RemIdx = OrigIdx % ElemsPerVReg; |
8027 | 0 | unsigned SubRegIdx = OrigIdx / ElemsPerVReg; |
8028 | 0 | unsigned ExtractIdx = |
8029 | 0 | SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); |
8030 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, |
8031 | 0 | DAG.getVectorIdxConstant(ExtractIdx, DL)); |
8032 | 0 | Idx = DAG.getVectorIdxConstant(RemIdx, DL); |
8033 | 0 | ContainerVT = M1VT; |
8034 | 0 | } |
8035 | | |
8036 | | // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which |
8037 | | // contains our index. |
8038 | 0 | std::optional<uint64_t> MaxIdx; |
8039 | 0 | if (VecVT.isFixedLengthVector()) |
8040 | 0 | MaxIdx = VecVT.getVectorNumElements() - 1; |
8041 | 0 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) |
8042 | 0 | MaxIdx = IdxC->getZExtValue(); |
8043 | 0 | if (MaxIdx) { |
8044 | 0 | if (auto SmallerVT = |
8045 | 0 | getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { |
8046 | 0 | ContainerVT = *SmallerVT; |
8047 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, |
8048 | 0 | DAG.getConstant(0, DL, XLenVT)); |
8049 | 0 | } |
8050 | 0 | } |
8051 | | |
8052 | | // If after narrowing, the required slide is still greater than LMUL2, |
8053 | | // fallback to generic expansion and go through the stack. This is done |
8054 | | // for a subtle reason: extracting *all* elements out of a vector is |
8055 | | // widely expected to be linear in vector size, but because vslidedown |
8056 | | // is linear in LMUL, performing N extracts using vslidedown becomes |
8057 | | // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack |
8058 | | // seems to have the same problem (the store is linear in LMUL), but the |
8059 | | // generic expansion *memoizes* the store, and thus for many extracts of |
8060 | | // the same vector we end up with one store and a bunch of loads. |
8061 | | // TODO: We don't have the same code for insert_vector_elt because we |
8062 | | // have BUILD_VECTOR and handle the degenerate case there. Should we |
8063 | | // consider adding an inverse BUILD_VECTOR node? |
8064 | 0 | MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT(); |
8065 | 0 | if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector()) |
8066 | 0 | return SDValue(); |
8067 | | |
8068 | | // If the index is 0, the vector is already in the right position. |
8069 | 0 | if (!isNullConstant(Idx)) { |
8070 | | // Use a VL of 1 to avoid processing more elements than we need. |
8071 | 0 | auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); |
8072 | 0 | Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, |
8073 | 0 | DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); |
8074 | 0 | } |
8075 | |
|
8076 | 0 | if (!EltVT.isInteger()) { |
8077 | | // Floating-point extracts are handled in TableGen. |
8078 | 0 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, |
8079 | 0 | DAG.getConstant(0, DL, XLenVT)); |
8080 | 0 | } |
8081 | | |
8082 | 0 | SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); |
8083 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); |
8084 | 0 | } |
8085 | | |
8086 | | // Some RVV intrinsics may claim that they want an integer operand to be |
8087 | | // promoted or expanded. |
8088 | | static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, |
8089 | 0 | const RISCVSubtarget &Subtarget) { |
8090 | 0 | assert((Op.getOpcode() == ISD::INTRINSIC_VOID || |
8091 | 0 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || |
8092 | 0 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && |
8093 | 0 | "Unexpected opcode"); |
8094 | | |
8095 | 0 | if (!Subtarget.hasVInstructions()) |
8096 | 0 | return SDValue(); |
8097 | | |
8098 | 0 | bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID || |
8099 | 0 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; |
8100 | 0 | unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); |
8101 | |
|
8102 | 0 | SDLoc DL(Op); |
8103 | |
|
8104 | 0 | const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = |
8105 | 0 | RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); |
8106 | 0 | if (!II || !II->hasScalarOperand()) |
8107 | 0 | return SDValue(); |
8108 | | |
8109 | 0 | unsigned SplatOp = II->ScalarOperand + 1 + HasChain; |
8110 | 0 | assert(SplatOp < Op.getNumOperands()); |
8111 | | |
8112 | 0 | SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); |
8113 | 0 | SDValue &ScalarOp = Operands[SplatOp]; |
8114 | 0 | MVT OpVT = ScalarOp.getSimpleValueType(); |
8115 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8116 | | |
8117 | | // If this isn't a scalar, or its type is XLenVT we're done. |
8118 | 0 | if (!OpVT.isScalarInteger() || OpVT == XLenVT) |
8119 | 0 | return SDValue(); |
8120 | | |
8121 | | // Simplest case is that the operand needs to be promoted to XLenVT. |
8122 | 0 | if (OpVT.bitsLT(XLenVT)) { |
8123 | | // If the operand is a constant, sign extend to increase our chances |
8124 | | // of being able to use a .vi instruction. ANY_EXTEND would become a |
8125 | | // a zero extend and the simm5 check in isel would fail. |
8126 | | // FIXME: Should we ignore the upper bits in isel instead? |
8127 | 0 | unsigned ExtOpc = |
8128 | 0 | isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; |
8129 | 0 | ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); |
8130 | 0 | return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); |
8131 | 0 | } |
8132 | | |
8133 | | // Use the previous operand to get the vXi64 VT. The result might be a mask |
8134 | | // VT for compares. Using the previous operand assumes that the previous |
8135 | | // operand will never have a smaller element size than a scalar operand and |
8136 | | // that a widening operation never uses SEW=64. |
8137 | | // NOTE: If this fails the below assert, we can probably just find the |
8138 | | // element count from any operand or result and use it to construct the VT. |
8139 | 0 | assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); |
8140 | 0 | MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); |
8141 | | |
8142 | | // The more complex case is when the scalar is larger than XLenVT. |
8143 | 0 | assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && |
8144 | 0 | VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); |
8145 | | |
8146 | | // If this is a sign-extended 32-bit value, we can truncate it and rely on the |
8147 | | // instruction to sign-extend since SEW>XLEN. |
8148 | 0 | if (DAG.ComputeNumSignBits(ScalarOp) > 32) { |
8149 | 0 | ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); |
8150 | 0 | return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); |
8151 | 0 | } |
8152 | | |
8153 | 0 | switch (IntNo) { |
8154 | 0 | case Intrinsic::riscv_vslide1up: |
8155 | 0 | case Intrinsic::riscv_vslide1down: |
8156 | 0 | case Intrinsic::riscv_vslide1up_mask: |
8157 | 0 | case Intrinsic::riscv_vslide1down_mask: { |
8158 | | // We need to special case these when the scalar is larger than XLen. |
8159 | 0 | unsigned NumOps = Op.getNumOperands(); |
8160 | 0 | bool IsMasked = NumOps == 7; |
8161 | | |
8162 | | // Convert the vector source to the equivalent nxvXi32 vector. |
8163 | 0 | MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); |
8164 | 0 | SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); |
8165 | 0 | SDValue ScalarLo, ScalarHi; |
8166 | 0 | std::tie(ScalarLo, ScalarHi) = |
8167 | 0 | DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32); |
8168 | | |
8169 | | // Double the VL since we halved SEW. |
8170 | 0 | SDValue AVL = getVLOperand(Op); |
8171 | 0 | SDValue I32VL; |
8172 | | |
8173 | | // Optimize for constant AVL |
8174 | 0 | if (isa<ConstantSDNode>(AVL)) { |
8175 | 0 | const auto [MinVLMAX, MaxVLMAX] = |
8176 | 0 | RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); |
8177 | |
|
8178 | 0 | uint64_t AVLInt = AVL->getAsZExtVal(); |
8179 | 0 | if (AVLInt <= MinVLMAX) { |
8180 | 0 | I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); |
8181 | 0 | } else if (AVLInt >= 2 * MaxVLMAX) { |
8182 | | // Just set vl to VLMAX in this situation |
8183 | 0 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); |
8184 | 0 | SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); |
8185 | 0 | unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); |
8186 | 0 | SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); |
8187 | 0 | SDValue SETVLMAX = DAG.getTargetConstant( |
8188 | 0 | Intrinsic::riscv_vsetvlimax, DL, MVT::i32); |
8189 | 0 | I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, |
8190 | 0 | LMUL); |
8191 | 0 | } else { |
8192 | | // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl |
8193 | | // is related to the hardware implementation. |
8194 | | // So let the following code handle |
8195 | 0 | } |
8196 | 0 | } |
8197 | 0 | if (!I32VL) { |
8198 | 0 | RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); |
8199 | 0 | SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); |
8200 | 0 | unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); |
8201 | 0 | SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); |
8202 | 0 | SDValue SETVL = |
8203 | 0 | DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32); |
8204 | | // Using vsetvli instruction to get actually used length which related to |
8205 | | // the hardware implementation |
8206 | 0 | SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, |
8207 | 0 | SEW, LMUL); |
8208 | 0 | I32VL = |
8209 | 0 | DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); |
8210 | 0 | } |
8211 | |
|
8212 | 0 | SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); |
8213 | | |
8214 | | // Shift the two scalar parts in using SEW=32 slide1up/slide1down |
8215 | | // instructions. |
8216 | 0 | SDValue Passthru; |
8217 | 0 | if (IsMasked) |
8218 | 0 | Passthru = DAG.getUNDEF(I32VT); |
8219 | 0 | else |
8220 | 0 | Passthru = DAG.getBitcast(I32VT, Operands[1]); |
8221 | |
|
8222 | 0 | if (IntNo == Intrinsic::riscv_vslide1up || |
8223 | 0 | IntNo == Intrinsic::riscv_vslide1up_mask) { |
8224 | 0 | Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, |
8225 | 0 | ScalarHi, I32Mask, I32VL); |
8226 | 0 | Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, |
8227 | 0 | ScalarLo, I32Mask, I32VL); |
8228 | 0 | } else { |
8229 | 0 | Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, |
8230 | 0 | ScalarLo, I32Mask, I32VL); |
8231 | 0 | Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, |
8232 | 0 | ScalarHi, I32Mask, I32VL); |
8233 | 0 | } |
8234 | | |
8235 | | // Convert back to nxvXi64. |
8236 | 0 | Vec = DAG.getBitcast(VT, Vec); |
8237 | |
|
8238 | 0 | if (!IsMasked) |
8239 | 0 | return Vec; |
8240 | | // Apply mask after the operation. |
8241 | 0 | SDValue Mask = Operands[NumOps - 3]; |
8242 | 0 | SDValue MaskedOff = Operands[1]; |
8243 | | // Assume Policy operand is the last operand. |
8244 | 0 | uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); |
8245 | | // We don't need to select maskedoff if it's undef. |
8246 | 0 | if (MaskedOff.isUndef()) |
8247 | 0 | return Vec; |
8248 | | // TAMU |
8249 | 0 | if (Policy == RISCVII::TAIL_AGNOSTIC) |
8250 | 0 | return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, |
8251 | 0 | DAG.getUNDEF(VT), AVL); |
8252 | | // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. |
8253 | | // It's fine because vmerge does not care mask policy. |
8254 | 0 | return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, |
8255 | 0 | MaskedOff, AVL); |
8256 | 0 | } |
8257 | 0 | } |
8258 | | |
8259 | | // We need to convert the scalar to a splat vector. |
8260 | 0 | SDValue VL = getVLOperand(Op); |
8261 | 0 | assert(VL.getValueType() == XLenVT); |
8262 | 0 | ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); |
8263 | 0 | return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); |
8264 | 0 | } |
8265 | | |
8266 | | // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support |
8267 | | // scalable vector llvm.get.vector.length for now. |
8268 | | // |
8269 | | // We need to convert from a scalable VF to a vsetvli with VLMax equal to |
8270 | | // (vscale * VF). The vscale and VF are independent of element width. We use |
8271 | | // SEW=8 for the vsetvli because it is the only element width that supports all |
8272 | | // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is |
8273 | | // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The |
8274 | | // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different |
8275 | | // SEW and LMUL are better for the surrounding vector instructions. |
8276 | | static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, |
8277 | 0 | const RISCVSubtarget &Subtarget) { |
8278 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8279 | | |
8280 | | // The smallest LMUL is only valid for the smallest element width. |
8281 | 0 | const unsigned ElementWidth = 8; |
8282 | | |
8283 | | // Determine the VF that corresponds to LMUL 1 for ElementWidth. |
8284 | 0 | unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; |
8285 | | // We don't support VF==1 with ELEN==32. |
8286 | 0 | unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELen(); |
8287 | |
|
8288 | 0 | unsigned VF = N->getConstantOperandVal(2); |
8289 | 0 | assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && |
8290 | 0 | "Unexpected VF"); |
8291 | 0 | (void)MinVF; |
8292 | |
|
8293 | 0 | bool Fractional = VF < LMul1VF; |
8294 | 0 | unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; |
8295 | 0 | unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); |
8296 | 0 | unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); |
8297 | |
|
8298 | 0 | SDLoc DL(N); |
8299 | |
|
8300 | 0 | SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); |
8301 | 0 | SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); |
8302 | |
|
8303 | 0 | SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); |
8304 | |
|
8305 | 0 | SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); |
8306 | 0 | SDValue Res = |
8307 | 0 | DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); |
8308 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); |
8309 | 0 | } |
8310 | | |
8311 | | static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG, |
8312 | 0 | SmallVector<SDValue> &Ops) { |
8313 | 0 | SDLoc DL(Op); |
8314 | |
|
8315 | 0 | const RISCVSubtarget &Subtarget = |
8316 | 0 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
8317 | 0 | for (const SDValue &V : Op->op_values()) { |
8318 | 0 | EVT ValType = V.getValueType(); |
8319 | 0 | if (ValType.isScalableVector() && ValType.isFloatingPoint()) { |
8320 | 0 | MVT InterimIVT = |
8321 | 0 | MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()), |
8322 | 0 | ValType.getVectorElementCount()); |
8323 | 0 | Ops.push_back(DAG.getBitcast(InterimIVT, V)); |
8324 | 0 | } else if (ValType.isFixedLengthVector()) { |
8325 | 0 | MVT OpContainerVT = getContainerForFixedLengthVector( |
8326 | 0 | DAG, V.getSimpleValueType(), Subtarget); |
8327 | 0 | Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget)); |
8328 | 0 | } else |
8329 | 0 | Ops.push_back(V); |
8330 | 0 | } |
8331 | 0 | } |
8332 | | |
8333 | | // LMUL * VLEN should be greater than or equal to EGS * SEW |
8334 | | static inline bool isValidEGW(int EGS, EVT VT, |
8335 | 0 | const RISCVSubtarget &Subtarget) { |
8336 | 0 | return (Subtarget.getRealMinVLen() * |
8337 | 0 | VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >= |
8338 | 0 | EGS * VT.getScalarSizeInBits(); |
8339 | 0 | } |
8340 | | |
8341 | | SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
8342 | 0 | SelectionDAG &DAG) const { |
8343 | 0 | unsigned IntNo = Op.getConstantOperandVal(0); |
8344 | 0 | SDLoc DL(Op); |
8345 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8346 | |
|
8347 | 0 | switch (IntNo) { |
8348 | 0 | default: |
8349 | 0 | break; // Don't custom lower most intrinsics. |
8350 | 0 | case Intrinsic::thread_pointer: { |
8351 | 0 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
8352 | 0 | return DAG.getRegister(RISCV::X4, PtrVT); |
8353 | 0 | } |
8354 | 0 | case Intrinsic::riscv_orc_b: |
8355 | 0 | case Intrinsic::riscv_brev8: |
8356 | 0 | case Intrinsic::riscv_sha256sig0: |
8357 | 0 | case Intrinsic::riscv_sha256sig1: |
8358 | 0 | case Intrinsic::riscv_sha256sum0: |
8359 | 0 | case Intrinsic::riscv_sha256sum1: |
8360 | 0 | case Intrinsic::riscv_sm3p0: |
8361 | 0 | case Intrinsic::riscv_sm3p1: { |
8362 | 0 | unsigned Opc; |
8363 | 0 | switch (IntNo) { |
8364 | 0 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
8365 | 0 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
8366 | 0 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
8367 | 0 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
8368 | 0 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
8369 | 0 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
8370 | 0 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
8371 | 0 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
8372 | 0 | } |
8373 | | |
8374 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8375 | 0 | SDValue NewOp = |
8376 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8377 | 0 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); |
8378 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8379 | 0 | } |
8380 | | |
8381 | 0 | return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); |
8382 | 0 | } |
8383 | 0 | case Intrinsic::riscv_sm4ks: |
8384 | 0 | case Intrinsic::riscv_sm4ed: { |
8385 | 0 | unsigned Opc = |
8386 | 0 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
8387 | |
|
8388 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8389 | 0 | SDValue NewOp0 = |
8390 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8391 | 0 | SDValue NewOp1 = |
8392 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8393 | 0 | SDValue Res = |
8394 | 0 | DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3)); |
8395 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8396 | 0 | } |
8397 | | |
8398 | 0 | return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), |
8399 | 0 | Op.getOperand(3)); |
8400 | 0 | } |
8401 | 0 | case Intrinsic::riscv_zip: |
8402 | 0 | case Intrinsic::riscv_unzip: { |
8403 | 0 | unsigned Opc = |
8404 | 0 | IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; |
8405 | 0 | return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); |
8406 | 0 | } |
8407 | 0 | case Intrinsic::riscv_clmul: |
8408 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8409 | 0 | SDValue NewOp0 = |
8410 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8411 | 0 | SDValue NewOp1 = |
8412 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8413 | 0 | SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); |
8414 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8415 | 0 | } |
8416 | 0 | return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1), |
8417 | 0 | Op.getOperand(2)); |
8418 | 0 | case Intrinsic::riscv_clmulh: |
8419 | 0 | case Intrinsic::riscv_clmulr: { |
8420 | 0 | unsigned Opc = |
8421 | 0 | IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR; |
8422 | 0 | if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) { |
8423 | 0 | SDValue NewOp0 = |
8424 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1)); |
8425 | 0 | SDValue NewOp1 = |
8426 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2)); |
8427 | 0 | NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, |
8428 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
8429 | 0 | NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, |
8430 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
8431 | 0 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); |
8432 | 0 | Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, |
8433 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
8434 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res); |
8435 | 0 | } |
8436 | | |
8437 | 0 | return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); |
8438 | 0 | } |
8439 | 0 | case Intrinsic::experimental_get_vector_length: |
8440 | 0 | return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); |
8441 | 0 | case Intrinsic::riscv_vmv_x_s: { |
8442 | 0 | SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); |
8443 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); |
8444 | 0 | } |
8445 | 0 | case Intrinsic::riscv_vfmv_f_s: |
8446 | 0 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), |
8447 | 0 | Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); |
8448 | 0 | case Intrinsic::riscv_vmv_v_x: |
8449 | 0 | return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), |
8450 | 0 | Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, |
8451 | 0 | Subtarget); |
8452 | 0 | case Intrinsic::riscv_vfmv_v_f: |
8453 | 0 | return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), |
8454 | 0 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
8455 | 0 | case Intrinsic::riscv_vmv_s_x: { |
8456 | 0 | SDValue Scalar = Op.getOperand(2); |
8457 | |
|
8458 | 0 | if (Scalar.getValueType().bitsLE(XLenVT)) { |
8459 | 0 | Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); |
8460 | 0 | return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), |
8461 | 0 | Op.getOperand(1), Scalar, Op.getOperand(3)); |
8462 | 0 | } |
8463 | | |
8464 | 0 | assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); |
8465 | | |
8466 | | // This is an i64 value that lives in two scalar registers. We have to |
8467 | | // insert this in a convoluted way. First we build vXi64 splat containing |
8468 | | // the two values that we assemble using some bit math. Next we'll use |
8469 | | // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask |
8470 | | // to merge element 0 from our splat into the source vector. |
8471 | | // FIXME: This is probably not the best way to do this, but it is |
8472 | | // consistent with INSERT_VECTOR_ELT lowering so it is a good starting |
8473 | | // point. |
8474 | | // sw lo, (a0) |
8475 | | // sw hi, 4(a0) |
8476 | | // vlse vX, (a0) |
8477 | | // |
8478 | | // vid.v vVid |
8479 | | // vmseq.vx mMask, vVid, 0 |
8480 | | // vmerge.vvm vDest, vSrc, vVal, mMask |
8481 | 0 | MVT VT = Op.getSimpleValueType(); |
8482 | 0 | SDValue Vec = Op.getOperand(1); |
8483 | 0 | SDValue VL = getVLOperand(Op); |
8484 | |
|
8485 | 0 | SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); |
8486 | 0 | if (Op.getOperand(1).isUndef()) |
8487 | 0 | return SplattedVal; |
8488 | 0 | SDValue SplattedIdx = |
8489 | 0 | DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
8490 | 0 | DAG.getConstant(0, DL, MVT::i32), VL); |
8491 | |
|
8492 | 0 | MVT MaskVT = getMaskTypeFor(VT); |
8493 | 0 | SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); |
8494 | 0 | SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); |
8495 | 0 | SDValue SelectCond = |
8496 | 0 | DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, |
8497 | 0 | {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), |
8498 | 0 | DAG.getUNDEF(MaskVT), Mask, VL}); |
8499 | 0 | return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, |
8500 | 0 | Vec, DAG.getUNDEF(VT), VL); |
8501 | 0 | } |
8502 | 0 | case Intrinsic::riscv_vfmv_s_f: |
8503 | 0 | return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(), |
8504 | 0 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
8505 | | // EGS * EEW >= 128 bits |
8506 | 0 | case Intrinsic::riscv_vaesdf_vv: |
8507 | 0 | case Intrinsic::riscv_vaesdf_vs: |
8508 | 0 | case Intrinsic::riscv_vaesdm_vv: |
8509 | 0 | case Intrinsic::riscv_vaesdm_vs: |
8510 | 0 | case Intrinsic::riscv_vaesef_vv: |
8511 | 0 | case Intrinsic::riscv_vaesef_vs: |
8512 | 0 | case Intrinsic::riscv_vaesem_vv: |
8513 | 0 | case Intrinsic::riscv_vaesem_vs: |
8514 | 0 | case Intrinsic::riscv_vaeskf1: |
8515 | 0 | case Intrinsic::riscv_vaeskf2: |
8516 | 0 | case Intrinsic::riscv_vaesz_vs: |
8517 | 0 | case Intrinsic::riscv_vsm4k: |
8518 | 0 | case Intrinsic::riscv_vsm4r_vv: |
8519 | 0 | case Intrinsic::riscv_vsm4r_vs: { |
8520 | 0 | if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || |
8521 | 0 | !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || |
8522 | 0 | !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) |
8523 | 0 | report_fatal_error("EGW should be greater than or equal to 4 * SEW."); |
8524 | 0 | return Op; |
8525 | 0 | } |
8526 | | // EGS * EEW >= 256 bits |
8527 | 0 | case Intrinsic::riscv_vsm3c: |
8528 | 0 | case Intrinsic::riscv_vsm3me: { |
8529 | 0 | if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) || |
8530 | 0 | !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget)) |
8531 | 0 | report_fatal_error("EGW should be greater than or equal to 8 * SEW."); |
8532 | 0 | return Op; |
8533 | 0 | } |
8534 | | // zvknha(SEW=32)/zvknhb(SEW=[32|64]) |
8535 | 0 | case Intrinsic::riscv_vsha2ch: |
8536 | 0 | case Intrinsic::riscv_vsha2cl: |
8537 | 0 | case Intrinsic::riscv_vsha2ms: { |
8538 | 0 | if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 && |
8539 | 0 | !Subtarget.hasStdExtZvknhb()) |
8540 | 0 | report_fatal_error("SEW=64 needs Zvknhb to be enabled."); |
8541 | 0 | if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) || |
8542 | 0 | !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) || |
8543 | 0 | !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget)) |
8544 | 0 | report_fatal_error("EGW should be greater than or equal to 4 * SEW."); |
8545 | 0 | return Op; |
8546 | 0 | } |
8547 | 0 | case Intrinsic::riscv_sf_vc_v_x: |
8548 | 0 | case Intrinsic::riscv_sf_vc_v_i: |
8549 | 0 | case Intrinsic::riscv_sf_vc_v_xv: |
8550 | 0 | case Intrinsic::riscv_sf_vc_v_iv: |
8551 | 0 | case Intrinsic::riscv_sf_vc_v_vv: |
8552 | 0 | case Intrinsic::riscv_sf_vc_v_fv: |
8553 | 0 | case Intrinsic::riscv_sf_vc_v_xvv: |
8554 | 0 | case Intrinsic::riscv_sf_vc_v_ivv: |
8555 | 0 | case Intrinsic::riscv_sf_vc_v_vvv: |
8556 | 0 | case Intrinsic::riscv_sf_vc_v_fvv: |
8557 | 0 | case Intrinsic::riscv_sf_vc_v_xvw: |
8558 | 0 | case Intrinsic::riscv_sf_vc_v_ivw: |
8559 | 0 | case Intrinsic::riscv_sf_vc_v_vvw: |
8560 | 0 | case Intrinsic::riscv_sf_vc_v_fvw: { |
8561 | 0 | MVT VT = Op.getSimpleValueType(); |
8562 | |
|
8563 | 0 | SmallVector<SDValue> Ops; |
8564 | 0 | getVCIXOperands(Op, DAG, Ops); |
8565 | |
|
8566 | 0 | MVT RetVT = VT; |
8567 | 0 | if (VT.isFixedLengthVector()) |
8568 | 0 | RetVT = getContainerForFixedLengthVector(VT); |
8569 | 0 | else if (VT.isFloatingPoint()) |
8570 | 0 | RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), |
8571 | 0 | VT.getVectorElementCount()); |
8572 | |
|
8573 | 0 | SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops); |
8574 | |
|
8575 | 0 | if (VT.isFixedLengthVector()) |
8576 | 0 | NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget); |
8577 | 0 | else if (VT.isFloatingPoint()) |
8578 | 0 | NewNode = DAG.getBitcast(VT, NewNode); |
8579 | |
|
8580 | 0 | if (Op == NewNode) |
8581 | 0 | break; |
8582 | | |
8583 | 0 | return NewNode; |
8584 | 0 | } |
8585 | 0 | } |
8586 | | |
8587 | 0 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
8588 | 0 | } |
8589 | | |
8590 | | SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, |
8591 | 0 | SelectionDAG &DAG) const { |
8592 | 0 | unsigned IntNo = Op.getConstantOperandVal(1); |
8593 | 0 | switch (IntNo) { |
8594 | 0 | default: |
8595 | 0 | break; |
8596 | 0 | case Intrinsic::riscv_masked_strided_load: { |
8597 | 0 | SDLoc DL(Op); |
8598 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8599 | | |
8600 | | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
8601 | | // the selection of the masked intrinsics doesn't do this for us. |
8602 | 0 | SDValue Mask = Op.getOperand(5); |
8603 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
8604 | |
|
8605 | 0 | MVT VT = Op->getSimpleValueType(0); |
8606 | 0 | MVT ContainerVT = VT; |
8607 | 0 | if (VT.isFixedLengthVector()) |
8608 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
8609 | |
|
8610 | 0 | SDValue PassThru = Op.getOperand(2); |
8611 | 0 | if (!IsUnmasked) { |
8612 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
8613 | 0 | if (VT.isFixedLengthVector()) { |
8614 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
8615 | 0 | PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); |
8616 | 0 | } |
8617 | 0 | } |
8618 | |
|
8619 | 0 | auto *Load = cast<MemIntrinsicSDNode>(Op); |
8620 | 0 | SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
8621 | 0 | SDValue Ptr = Op.getOperand(3); |
8622 | 0 | SDValue Stride = Op.getOperand(4); |
8623 | 0 | SDValue Result, Chain; |
8624 | | |
8625 | | // TODO: We restrict this to unmasked loads currently in consideration of |
8626 | | // the complexity of hanlding all falses masks. |
8627 | 0 | if (IsUnmasked && isNullConstant(Stride)) { |
8628 | 0 | MVT ScalarVT = ContainerVT.getVectorElementType(); |
8629 | 0 | SDValue ScalarLoad = |
8630 | 0 | DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, |
8631 | 0 | ScalarVT, Load->getMemOperand()); |
8632 | 0 | Chain = ScalarLoad.getValue(1); |
8633 | 0 | Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, |
8634 | 0 | Subtarget); |
8635 | 0 | } else { |
8636 | 0 | SDValue IntID = DAG.getTargetConstant( |
8637 | 0 | IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, |
8638 | 0 | XLenVT); |
8639 | |
|
8640 | 0 | SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; |
8641 | 0 | if (IsUnmasked) |
8642 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
8643 | 0 | else |
8644 | 0 | Ops.push_back(PassThru); |
8645 | 0 | Ops.push_back(Ptr); |
8646 | 0 | Ops.push_back(Stride); |
8647 | 0 | if (!IsUnmasked) |
8648 | 0 | Ops.push_back(Mask); |
8649 | 0 | Ops.push_back(VL); |
8650 | 0 | if (!IsUnmasked) { |
8651 | 0 | SDValue Policy = |
8652 | 0 | DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); |
8653 | 0 | Ops.push_back(Policy); |
8654 | 0 | } |
8655 | |
|
8656 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
8657 | 0 | Result = |
8658 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, |
8659 | 0 | Load->getMemoryVT(), Load->getMemOperand()); |
8660 | 0 | Chain = Result.getValue(1); |
8661 | 0 | } |
8662 | 0 | if (VT.isFixedLengthVector()) |
8663 | 0 | Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
8664 | 0 | return DAG.getMergeValues({Result, Chain}, DL); |
8665 | 0 | } |
8666 | 0 | case Intrinsic::riscv_seg2_load: |
8667 | 0 | case Intrinsic::riscv_seg3_load: |
8668 | 0 | case Intrinsic::riscv_seg4_load: |
8669 | 0 | case Intrinsic::riscv_seg5_load: |
8670 | 0 | case Intrinsic::riscv_seg6_load: |
8671 | 0 | case Intrinsic::riscv_seg7_load: |
8672 | 0 | case Intrinsic::riscv_seg8_load: { |
8673 | 0 | SDLoc DL(Op); |
8674 | 0 | static const Intrinsic::ID VlsegInts[7] = { |
8675 | 0 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
8676 | 0 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
8677 | 0 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
8678 | 0 | Intrinsic::riscv_vlseg8}; |
8679 | 0 | unsigned NF = Op->getNumValues() - 1; |
8680 | 0 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); |
8681 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8682 | 0 | MVT VT = Op->getSimpleValueType(0); |
8683 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
8684 | |
|
8685 | 0 | SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, |
8686 | 0 | Subtarget); |
8687 | 0 | SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); |
8688 | 0 | auto *Load = cast<MemIntrinsicSDNode>(Op); |
8689 | 0 | SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); |
8690 | 0 | ContainerVTs.push_back(MVT::Other); |
8691 | 0 | SDVTList VTs = DAG.getVTList(ContainerVTs); |
8692 | 0 | SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; |
8693 | 0 | Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); |
8694 | 0 | Ops.push_back(Op.getOperand(2)); |
8695 | 0 | Ops.push_back(VL); |
8696 | 0 | SDValue Result = |
8697 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, |
8698 | 0 | Load->getMemoryVT(), Load->getMemOperand()); |
8699 | 0 | SmallVector<SDValue, 9> Results; |
8700 | 0 | for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) |
8701 | 0 | Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), |
8702 | 0 | DAG, Subtarget)); |
8703 | 0 | Results.push_back(Result.getValue(NF)); |
8704 | 0 | return DAG.getMergeValues(Results, DL); |
8705 | 0 | } |
8706 | 0 | case Intrinsic::riscv_sf_vc_v_x_se: |
8707 | 0 | case Intrinsic::riscv_sf_vc_v_i_se: |
8708 | 0 | case Intrinsic::riscv_sf_vc_v_xv_se: |
8709 | 0 | case Intrinsic::riscv_sf_vc_v_iv_se: |
8710 | 0 | case Intrinsic::riscv_sf_vc_v_vv_se: |
8711 | 0 | case Intrinsic::riscv_sf_vc_v_fv_se: |
8712 | 0 | case Intrinsic::riscv_sf_vc_v_xvv_se: |
8713 | 0 | case Intrinsic::riscv_sf_vc_v_ivv_se: |
8714 | 0 | case Intrinsic::riscv_sf_vc_v_vvv_se: |
8715 | 0 | case Intrinsic::riscv_sf_vc_v_fvv_se: |
8716 | 0 | case Intrinsic::riscv_sf_vc_v_xvw_se: |
8717 | 0 | case Intrinsic::riscv_sf_vc_v_ivw_se: |
8718 | 0 | case Intrinsic::riscv_sf_vc_v_vvw_se: |
8719 | 0 | case Intrinsic::riscv_sf_vc_v_fvw_se: { |
8720 | 0 | MVT VT = Op.getSimpleValueType(); |
8721 | 0 | SDLoc DL(Op); |
8722 | 0 | SmallVector<SDValue> Ops; |
8723 | 0 | getVCIXOperands(Op, DAG, Ops); |
8724 | |
|
8725 | 0 | MVT RetVT = VT; |
8726 | 0 | if (VT.isFixedLengthVector()) |
8727 | 0 | RetVT = getContainerForFixedLengthVector(VT); |
8728 | 0 | else if (VT.isFloatingPoint()) |
8729 | 0 | RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()), |
8730 | 0 | RetVT.getVectorElementCount()); |
8731 | |
|
8732 | 0 | SDVTList VTs = DAG.getVTList({RetVT, MVT::Other}); |
8733 | 0 | SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops); |
8734 | |
|
8735 | 0 | if (VT.isFixedLengthVector()) { |
8736 | 0 | SDValue FixedVector = |
8737 | 0 | convertFromScalableVector(VT, NewNode, DAG, Subtarget); |
8738 | 0 | NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL); |
8739 | 0 | } else if (VT.isFloatingPoint()) { |
8740 | 0 | SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0)); |
8741 | 0 | NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL); |
8742 | 0 | } |
8743 | |
|
8744 | 0 | if (Op == NewNode) |
8745 | 0 | break; |
8746 | | |
8747 | 0 | return NewNode; |
8748 | 0 | } |
8749 | 0 | } |
8750 | | |
8751 | 0 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
8752 | 0 | } |
8753 | | |
8754 | | SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, |
8755 | 0 | SelectionDAG &DAG) const { |
8756 | 0 | unsigned IntNo = Op.getConstantOperandVal(1); |
8757 | 0 | switch (IntNo) { |
8758 | 0 | default: |
8759 | 0 | break; |
8760 | 0 | case Intrinsic::riscv_masked_strided_store: { |
8761 | 0 | SDLoc DL(Op); |
8762 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8763 | | |
8764 | | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
8765 | | // the selection of the masked intrinsics doesn't do this for us. |
8766 | 0 | SDValue Mask = Op.getOperand(5); |
8767 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
8768 | |
|
8769 | 0 | SDValue Val = Op.getOperand(2); |
8770 | 0 | MVT VT = Val.getSimpleValueType(); |
8771 | 0 | MVT ContainerVT = VT; |
8772 | 0 | if (VT.isFixedLengthVector()) { |
8773 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
8774 | 0 | Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); |
8775 | 0 | } |
8776 | 0 | if (!IsUnmasked) { |
8777 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
8778 | 0 | if (VT.isFixedLengthVector()) |
8779 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
8780 | 0 | } |
8781 | |
|
8782 | 0 | SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
8783 | |
|
8784 | 0 | SDValue IntID = DAG.getTargetConstant( |
8785 | 0 | IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, |
8786 | 0 | XLenVT); |
8787 | |
|
8788 | 0 | auto *Store = cast<MemIntrinsicSDNode>(Op); |
8789 | 0 | SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; |
8790 | 0 | Ops.push_back(Val); |
8791 | 0 | Ops.push_back(Op.getOperand(3)); // Ptr |
8792 | 0 | Ops.push_back(Op.getOperand(4)); // Stride |
8793 | 0 | if (!IsUnmasked) |
8794 | 0 | Ops.push_back(Mask); |
8795 | 0 | Ops.push_back(VL); |
8796 | |
|
8797 | 0 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), |
8798 | 0 | Ops, Store->getMemoryVT(), |
8799 | 0 | Store->getMemOperand()); |
8800 | 0 | } |
8801 | 0 | case Intrinsic::riscv_seg2_store: |
8802 | 0 | case Intrinsic::riscv_seg3_store: |
8803 | 0 | case Intrinsic::riscv_seg4_store: |
8804 | 0 | case Intrinsic::riscv_seg5_store: |
8805 | 0 | case Intrinsic::riscv_seg6_store: |
8806 | 0 | case Intrinsic::riscv_seg7_store: |
8807 | 0 | case Intrinsic::riscv_seg8_store: { |
8808 | 0 | SDLoc DL(Op); |
8809 | 0 | static const Intrinsic::ID VssegInts[] = { |
8810 | 0 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
8811 | 0 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
8812 | 0 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
8813 | 0 | Intrinsic::riscv_vsseg8}; |
8814 | | // Operands are (chain, int_id, vec*, ptr, vl) |
8815 | 0 | unsigned NF = Op->getNumOperands() - 4; |
8816 | 0 | assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); |
8817 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8818 | 0 | MVT VT = Op->getOperand(2).getSimpleValueType(); |
8819 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
8820 | |
|
8821 | 0 | SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, |
8822 | 0 | Subtarget); |
8823 | 0 | SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT); |
8824 | 0 | SDValue Ptr = Op->getOperand(NF + 2); |
8825 | |
|
8826 | 0 | auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op); |
8827 | 0 | SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID}; |
8828 | 0 | for (unsigned i = 0; i < NF; i++) |
8829 | 0 | Ops.push_back(convertToScalableVector( |
8830 | 0 | ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget)); |
8831 | 0 | Ops.append({Ptr, VL}); |
8832 | |
|
8833 | 0 | return DAG.getMemIntrinsicNode( |
8834 | 0 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops, |
8835 | 0 | FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand()); |
8836 | 0 | } |
8837 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8mf8: |
8838 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8mf4: |
8839 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8mf2: |
8840 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8m1: |
8841 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8m2: |
8842 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8m4: |
8843 | 0 | case Intrinsic::riscv_sf_vc_x_se_e8m8: |
8844 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16mf4: |
8845 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16mf2: |
8846 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16m1: |
8847 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16m2: |
8848 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16m4: |
8849 | 0 | case Intrinsic::riscv_sf_vc_x_se_e16m8: |
8850 | 0 | case Intrinsic::riscv_sf_vc_x_se_e32mf2: |
8851 | 0 | case Intrinsic::riscv_sf_vc_x_se_e32m1: |
8852 | 0 | case Intrinsic::riscv_sf_vc_x_se_e32m2: |
8853 | 0 | case Intrinsic::riscv_sf_vc_x_se_e32m4: |
8854 | 0 | case Intrinsic::riscv_sf_vc_x_se_e32m8: |
8855 | 0 | case Intrinsic::riscv_sf_vc_x_se_e64m1: |
8856 | 0 | case Intrinsic::riscv_sf_vc_x_se_e64m2: |
8857 | 0 | case Intrinsic::riscv_sf_vc_x_se_e64m4: |
8858 | 0 | case Intrinsic::riscv_sf_vc_x_se_e64m8: |
8859 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8mf8: |
8860 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8mf4: |
8861 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8mf2: |
8862 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8m1: |
8863 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8m2: |
8864 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8m4: |
8865 | 0 | case Intrinsic::riscv_sf_vc_i_se_e8m8: |
8866 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16mf4: |
8867 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16mf2: |
8868 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16m1: |
8869 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16m2: |
8870 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16m4: |
8871 | 0 | case Intrinsic::riscv_sf_vc_i_se_e16m8: |
8872 | 0 | case Intrinsic::riscv_sf_vc_i_se_e32mf2: |
8873 | 0 | case Intrinsic::riscv_sf_vc_i_se_e32m1: |
8874 | 0 | case Intrinsic::riscv_sf_vc_i_se_e32m2: |
8875 | 0 | case Intrinsic::riscv_sf_vc_i_se_e32m4: |
8876 | 0 | case Intrinsic::riscv_sf_vc_i_se_e32m8: |
8877 | 0 | case Intrinsic::riscv_sf_vc_i_se_e64m1: |
8878 | 0 | case Intrinsic::riscv_sf_vc_i_se_e64m2: |
8879 | 0 | case Intrinsic::riscv_sf_vc_i_se_e64m4: |
8880 | 0 | case Intrinsic::riscv_sf_vc_i_se_e64m8: |
8881 | 0 | case Intrinsic::riscv_sf_vc_xv_se: |
8882 | 0 | case Intrinsic::riscv_sf_vc_iv_se: |
8883 | 0 | case Intrinsic::riscv_sf_vc_vv_se: |
8884 | 0 | case Intrinsic::riscv_sf_vc_fv_se: |
8885 | 0 | case Intrinsic::riscv_sf_vc_xvv_se: |
8886 | 0 | case Intrinsic::riscv_sf_vc_ivv_se: |
8887 | 0 | case Intrinsic::riscv_sf_vc_vvv_se: |
8888 | 0 | case Intrinsic::riscv_sf_vc_fvv_se: |
8889 | 0 | case Intrinsic::riscv_sf_vc_xvw_se: |
8890 | 0 | case Intrinsic::riscv_sf_vc_ivw_se: |
8891 | 0 | case Intrinsic::riscv_sf_vc_vvw_se: |
8892 | 0 | case Intrinsic::riscv_sf_vc_fvw_se: { |
8893 | 0 | SmallVector<SDValue> Ops; |
8894 | 0 | getVCIXOperands(Op, DAG, Ops); |
8895 | |
|
8896 | 0 | SDValue NewNode = |
8897 | 0 | DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops); |
8898 | |
|
8899 | 0 | if (Op == NewNode) |
8900 | 0 | break; |
8901 | | |
8902 | 0 | return NewNode; |
8903 | 0 | } |
8904 | 0 | } |
8905 | | |
8906 | 0 | return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); |
8907 | 0 | } |
8908 | | |
8909 | 0 | static unsigned getRVVReductionOp(unsigned ISDOpcode) { |
8910 | 0 | switch (ISDOpcode) { |
8911 | 0 | default: |
8912 | 0 | llvm_unreachable("Unhandled reduction"); |
8913 | 0 | case ISD::VP_REDUCE_ADD: |
8914 | 0 | case ISD::VECREDUCE_ADD: |
8915 | 0 | return RISCVISD::VECREDUCE_ADD_VL; |
8916 | 0 | case ISD::VP_REDUCE_UMAX: |
8917 | 0 | case ISD::VECREDUCE_UMAX: |
8918 | 0 | return RISCVISD::VECREDUCE_UMAX_VL; |
8919 | 0 | case ISD::VP_REDUCE_SMAX: |
8920 | 0 | case ISD::VECREDUCE_SMAX: |
8921 | 0 | return RISCVISD::VECREDUCE_SMAX_VL; |
8922 | 0 | case ISD::VP_REDUCE_UMIN: |
8923 | 0 | case ISD::VECREDUCE_UMIN: |
8924 | 0 | return RISCVISD::VECREDUCE_UMIN_VL; |
8925 | 0 | case ISD::VP_REDUCE_SMIN: |
8926 | 0 | case ISD::VECREDUCE_SMIN: |
8927 | 0 | return RISCVISD::VECREDUCE_SMIN_VL; |
8928 | 0 | case ISD::VP_REDUCE_AND: |
8929 | 0 | case ISD::VECREDUCE_AND: |
8930 | 0 | return RISCVISD::VECREDUCE_AND_VL; |
8931 | 0 | case ISD::VP_REDUCE_OR: |
8932 | 0 | case ISD::VECREDUCE_OR: |
8933 | 0 | return RISCVISD::VECREDUCE_OR_VL; |
8934 | 0 | case ISD::VP_REDUCE_XOR: |
8935 | 0 | case ISD::VECREDUCE_XOR: |
8936 | 0 | return RISCVISD::VECREDUCE_XOR_VL; |
8937 | 0 | case ISD::VP_REDUCE_FADD: |
8938 | 0 | return RISCVISD::VECREDUCE_FADD_VL; |
8939 | 0 | case ISD::VP_REDUCE_SEQ_FADD: |
8940 | 0 | return RISCVISD::VECREDUCE_SEQ_FADD_VL; |
8941 | 0 | case ISD::VP_REDUCE_FMAX: |
8942 | 0 | return RISCVISD::VECREDUCE_FMAX_VL; |
8943 | 0 | case ISD::VP_REDUCE_FMIN: |
8944 | 0 | return RISCVISD::VECREDUCE_FMIN_VL; |
8945 | 0 | } |
8946 | |
|
8947 | 0 | } |
8948 | | |
8949 | | SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, |
8950 | | SelectionDAG &DAG, |
8951 | 0 | bool IsVP) const { |
8952 | 0 | SDLoc DL(Op); |
8953 | 0 | SDValue Vec = Op.getOperand(IsVP ? 1 : 0); |
8954 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
8955 | 0 | assert((Op.getOpcode() == ISD::VECREDUCE_AND || |
8956 | 0 | Op.getOpcode() == ISD::VECREDUCE_OR || |
8957 | 0 | Op.getOpcode() == ISD::VECREDUCE_XOR || |
8958 | 0 | Op.getOpcode() == ISD::VP_REDUCE_AND || |
8959 | 0 | Op.getOpcode() == ISD::VP_REDUCE_OR || |
8960 | 0 | Op.getOpcode() == ISD::VP_REDUCE_XOR) && |
8961 | 0 | "Unexpected reduction lowering"); |
8962 | | |
8963 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
8964 | |
|
8965 | 0 | MVT ContainerVT = VecVT; |
8966 | 0 | if (VecVT.isFixedLengthVector()) { |
8967 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
8968 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
8969 | 0 | } |
8970 | |
|
8971 | 0 | SDValue Mask, VL; |
8972 | 0 | if (IsVP) { |
8973 | 0 | Mask = Op.getOperand(2); |
8974 | 0 | VL = Op.getOperand(3); |
8975 | 0 | } else { |
8976 | 0 | std::tie(Mask, VL) = |
8977 | 0 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
8978 | 0 | } |
8979 | |
|
8980 | 0 | unsigned BaseOpc; |
8981 | 0 | ISD::CondCode CC; |
8982 | 0 | SDValue Zero = DAG.getConstant(0, DL, XLenVT); |
8983 | |
|
8984 | 0 | switch (Op.getOpcode()) { |
8985 | 0 | default: |
8986 | 0 | llvm_unreachable("Unhandled reduction"); |
8987 | 0 | case ISD::VECREDUCE_AND: |
8988 | 0 | case ISD::VP_REDUCE_AND: { |
8989 | | // vcpop ~x == 0 |
8990 | 0 | SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); |
8991 | 0 | Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); |
8992 | 0 | Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); |
8993 | 0 | CC = ISD::SETEQ; |
8994 | 0 | BaseOpc = ISD::AND; |
8995 | 0 | break; |
8996 | 0 | } |
8997 | 0 | case ISD::VECREDUCE_OR: |
8998 | 0 | case ISD::VP_REDUCE_OR: |
8999 | | // vcpop x != 0 |
9000 | 0 | Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); |
9001 | 0 | CC = ISD::SETNE; |
9002 | 0 | BaseOpc = ISD::OR; |
9003 | 0 | break; |
9004 | 0 | case ISD::VECREDUCE_XOR: |
9005 | 0 | case ISD::VP_REDUCE_XOR: { |
9006 | | // ((vcpop x) & 1) != 0 |
9007 | 0 | SDValue One = DAG.getConstant(1, DL, XLenVT); |
9008 | 0 | Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); |
9009 | 0 | Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); |
9010 | 0 | CC = ISD::SETNE; |
9011 | 0 | BaseOpc = ISD::XOR; |
9012 | 0 | break; |
9013 | 0 | } |
9014 | 0 | } |
9015 | | |
9016 | 0 | SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); |
9017 | 0 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC); |
9018 | |
|
9019 | 0 | if (!IsVP) |
9020 | 0 | return SetCC; |
9021 | | |
9022 | | // Now include the start value in the operation. |
9023 | | // Note that we must return the start value when no elements are operated |
9024 | | // upon. The vcpop instructions we've emitted in each case above will return |
9025 | | // 0 for an inactive vector, and so we've already received the neutral value: |
9026 | | // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we |
9027 | | // can simply include the start value. |
9028 | 0 | return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0)); |
9029 | 0 | } |
9030 | | |
9031 | 0 | static bool isNonZeroAVL(SDValue AVL) { |
9032 | 0 | auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); |
9033 | 0 | auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); |
9034 | 0 | return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || |
9035 | 0 | (ImmAVL && ImmAVL->getZExtValue() >= 1); |
9036 | 0 | } |
9037 | | |
9038 | | /// Helper to lower a reduction sequence of the form: |
9039 | | /// scalar = reduce_op vec, scalar_start |
9040 | | static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, |
9041 | | SDValue StartValue, SDValue Vec, SDValue Mask, |
9042 | | SDValue VL, const SDLoc &DL, SelectionDAG &DAG, |
9043 | 0 | const RISCVSubtarget &Subtarget) { |
9044 | 0 | const MVT VecVT = Vec.getSimpleValueType(); |
9045 | 0 | const MVT M1VT = getLMUL1VT(VecVT); |
9046 | 0 | const MVT XLenVT = Subtarget.getXLenVT(); |
9047 | 0 | const bool NonZeroAVL = isNonZeroAVL(VL); |
9048 | | |
9049 | | // The reduction needs an LMUL1 input; do the splat at either LMUL1 |
9050 | | // or the original VT if fractional. |
9051 | 0 | auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; |
9052 | | // We reuse the VL of the reduction to reduce vsetvli toggles if we can |
9053 | | // prove it is non-zero. For the AVL=0 case, we need the scalar to |
9054 | | // be the result of the reduction operation. |
9055 | 0 | auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); |
9056 | 0 | SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, |
9057 | 0 | DAG, Subtarget); |
9058 | 0 | if (M1VT != InnerVT) |
9059 | 0 | InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, |
9060 | 0 | DAG.getUNDEF(M1VT), |
9061 | 0 | InitialValue, DAG.getConstant(0, DL, XLenVT)); |
9062 | 0 | SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; |
9063 | 0 | SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); |
9064 | 0 | SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy}; |
9065 | 0 | SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops); |
9066 | 0 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, |
9067 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9068 | 0 | } |
9069 | | |
9070 | | SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, |
9071 | 0 | SelectionDAG &DAG) const { |
9072 | 0 | SDLoc DL(Op); |
9073 | 0 | SDValue Vec = Op.getOperand(0); |
9074 | 0 | EVT VecEVT = Vec.getValueType(); |
9075 | |
|
9076 | 0 | unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); |
9077 | | |
9078 | | // Due to ordering in legalize types we may have a vector type that needs to |
9079 | | // be split. Do that manually so we can get down to a legal type. |
9080 | 0 | while (getTypeAction(*DAG.getContext(), VecEVT) == |
9081 | 0 | TargetLowering::TypeSplitVector) { |
9082 | 0 | auto [Lo, Hi] = DAG.SplitVector(Vec, DL); |
9083 | 0 | VecEVT = Lo.getValueType(); |
9084 | 0 | Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); |
9085 | 0 | } |
9086 | | |
9087 | | // TODO: The type may need to be widened rather than split. Or widened before |
9088 | | // it can be split. |
9089 | 0 | if (!isTypeLegal(VecEVT)) |
9090 | 0 | return SDValue(); |
9091 | | |
9092 | 0 | MVT VecVT = VecEVT.getSimpleVT(); |
9093 | 0 | MVT VecEltVT = VecVT.getVectorElementType(); |
9094 | 0 | unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); |
9095 | |
|
9096 | 0 | MVT ContainerVT = VecVT; |
9097 | 0 | if (VecVT.isFixedLengthVector()) { |
9098 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
9099 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
9100 | 0 | } |
9101 | |
|
9102 | 0 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9103 | |
|
9104 | 0 | SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); |
9105 | 0 | switch (BaseOpc) { |
9106 | 0 | case ISD::AND: |
9107 | 0 | case ISD::OR: |
9108 | 0 | case ISD::UMAX: |
9109 | 0 | case ISD::UMIN: |
9110 | 0 | case ISD::SMAX: |
9111 | 0 | case ISD::SMIN: |
9112 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9113 | 0 | StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, |
9114 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9115 | 0 | } |
9116 | 0 | return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, |
9117 | 0 | Mask, VL, DL, DAG, Subtarget); |
9118 | 0 | } |
9119 | | |
9120 | | // Given a reduction op, this function returns the matching reduction opcode, |
9121 | | // the vector SDValue and the scalar SDValue required to lower this to a |
9122 | | // RISCVISD node. |
9123 | | static std::tuple<unsigned, SDValue, SDValue> |
9124 | | getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, |
9125 | 0 | const RISCVSubtarget &Subtarget) { |
9126 | 0 | SDLoc DL(Op); |
9127 | 0 | auto Flags = Op->getFlags(); |
9128 | 0 | unsigned Opcode = Op.getOpcode(); |
9129 | 0 | switch (Opcode) { |
9130 | 0 | default: |
9131 | 0 | llvm_unreachable("Unhandled reduction"); |
9132 | 0 | case ISD::VECREDUCE_FADD: { |
9133 | | // Use positive zero if we can. It is cheaper to materialize. |
9134 | 0 | SDValue Zero = |
9135 | 0 | DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); |
9136 | 0 | return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); |
9137 | 0 | } |
9138 | 0 | case ISD::VECREDUCE_SEQ_FADD: |
9139 | 0 | return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), |
9140 | 0 | Op.getOperand(0)); |
9141 | 0 | case ISD::VECREDUCE_FMIN: |
9142 | 0 | case ISD::VECREDUCE_FMAX: { |
9143 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9144 | 0 | SDValue Front = |
9145 | 0 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0), |
9146 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9147 | 0 | unsigned RVVOpc = (Opcode == ISD::VECREDUCE_FMIN) |
9148 | 0 | ? RISCVISD::VECREDUCE_FMIN_VL |
9149 | 0 | : RISCVISD::VECREDUCE_FMAX_VL; |
9150 | 0 | return std::make_tuple(RVVOpc, Op.getOperand(0), Front); |
9151 | 0 | } |
9152 | 0 | } |
9153 | 0 | } |
9154 | | |
9155 | | SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, |
9156 | 0 | SelectionDAG &DAG) const { |
9157 | 0 | SDLoc DL(Op); |
9158 | 0 | MVT VecEltVT = Op.getSimpleValueType(); |
9159 | |
|
9160 | 0 | unsigned RVVOpcode; |
9161 | 0 | SDValue VectorVal, ScalarVal; |
9162 | 0 | std::tie(RVVOpcode, VectorVal, ScalarVal) = |
9163 | 0 | getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); |
9164 | 0 | MVT VecVT = VectorVal.getSimpleValueType(); |
9165 | |
|
9166 | 0 | MVT ContainerVT = VecVT; |
9167 | 0 | if (VecVT.isFixedLengthVector()) { |
9168 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
9169 | 0 | VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); |
9170 | 0 | } |
9171 | |
|
9172 | 0 | auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); |
9173 | 0 | return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal, |
9174 | 0 | VectorVal, Mask, VL, DL, DAG, Subtarget); |
9175 | 0 | } |
9176 | | |
9177 | | SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, |
9178 | 0 | SelectionDAG &DAG) const { |
9179 | 0 | SDLoc DL(Op); |
9180 | 0 | SDValue Vec = Op.getOperand(1); |
9181 | 0 | EVT VecEVT = Vec.getValueType(); |
9182 | | |
9183 | | // TODO: The type may need to be widened rather than split. Or widened before |
9184 | | // it can be split. |
9185 | 0 | if (!isTypeLegal(VecEVT)) |
9186 | 0 | return SDValue(); |
9187 | | |
9188 | 0 | MVT VecVT = VecEVT.getSimpleVT(); |
9189 | 0 | unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); |
9190 | |
|
9191 | 0 | if (VecVT.isFixedLengthVector()) { |
9192 | 0 | auto ContainerVT = getContainerForFixedLengthVector(VecVT); |
9193 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
9194 | 0 | } |
9195 | |
|
9196 | 0 | SDValue VL = Op.getOperand(3); |
9197 | 0 | SDValue Mask = Op.getOperand(2); |
9198 | 0 | return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), |
9199 | 0 | Vec, Mask, VL, DL, DAG, Subtarget); |
9200 | 0 | } |
9201 | | |
9202 | | SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, |
9203 | 0 | SelectionDAG &DAG) const { |
9204 | 0 | SDValue Vec = Op.getOperand(0); |
9205 | 0 | SDValue SubVec = Op.getOperand(1); |
9206 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
9207 | 0 | MVT SubVecVT = SubVec.getSimpleValueType(); |
9208 | |
|
9209 | 0 | SDLoc DL(Op); |
9210 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9211 | 0 | unsigned OrigIdx = Op.getConstantOperandVal(2); |
9212 | 0 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
9213 | | |
9214 | | // We don't have the ability to slide mask vectors up indexed by their i1 |
9215 | | // elements; the smallest we can do is i8. Often we are able to bitcast to |
9216 | | // equivalent i8 vectors. Note that when inserting a fixed-length vector |
9217 | | // into a scalable one, we might not necessarily have enough scalable |
9218 | | // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. |
9219 | 0 | if (SubVecVT.getVectorElementType() == MVT::i1 && |
9220 | 0 | (OrigIdx != 0 || !Vec.isUndef())) { |
9221 | 0 | if (VecVT.getVectorMinNumElements() >= 8 && |
9222 | 0 | SubVecVT.getVectorMinNumElements() >= 8) { |
9223 | 0 | assert(OrigIdx % 8 == 0 && "Invalid index"); |
9224 | 0 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
9225 | 0 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
9226 | 0 | "Unexpected mask vector lowering"); |
9227 | 0 | OrigIdx /= 8; |
9228 | 0 | SubVecVT = |
9229 | 0 | MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, |
9230 | 0 | SubVecVT.isScalableVector()); |
9231 | 0 | VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, |
9232 | 0 | VecVT.isScalableVector()); |
9233 | 0 | Vec = DAG.getBitcast(VecVT, Vec); |
9234 | 0 | SubVec = DAG.getBitcast(SubVecVT, SubVec); |
9235 | 0 | } else { |
9236 | | // We can't slide this mask vector up indexed by its i1 elements. |
9237 | | // This poses a problem when we wish to insert a scalable vector which |
9238 | | // can't be re-expressed as a larger type. Just choose the slow path and |
9239 | | // extend to a larger type, then truncate back down. |
9240 | 0 | MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); |
9241 | 0 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); |
9242 | 0 | Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); |
9243 | 0 | SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); |
9244 | 0 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, |
9245 | 0 | Op.getOperand(2)); |
9246 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); |
9247 | 0 | return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); |
9248 | 0 | } |
9249 | 0 | } |
9250 | | |
9251 | | // If the subvector vector is a fixed-length type, we cannot use subregister |
9252 | | // manipulation to simplify the codegen; we don't know which register of a |
9253 | | // LMUL group contains the specific subvector as we only know the minimum |
9254 | | // register size. Therefore we must slide the vector group up the full |
9255 | | // amount. |
9256 | 0 | if (SubVecVT.isFixedLengthVector()) { |
9257 | 0 | if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) |
9258 | 0 | return Op; |
9259 | 0 | MVT ContainerVT = VecVT; |
9260 | 0 | if (VecVT.isFixedLengthVector()) { |
9261 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
9262 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
9263 | 0 | } |
9264 | |
|
9265 | 0 | if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { |
9266 | 0 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, |
9267 | 0 | DAG.getUNDEF(ContainerVT), SubVec, |
9268 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9269 | 0 | SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); |
9270 | 0 | return DAG.getBitcast(Op.getValueType(), SubVec); |
9271 | 0 | } |
9272 | | |
9273 | 0 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, |
9274 | 0 | DAG.getUNDEF(ContainerVT), SubVec, |
9275 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9276 | 0 | SDValue Mask = |
9277 | 0 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
9278 | | // Set the vector length to only the number of elements we care about. Note |
9279 | | // that for slideup this includes the offset. |
9280 | 0 | unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements(); |
9281 | 0 | SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget); |
9282 | | |
9283 | | // Use tail agnostic policy if we're inserting over Vec's tail. |
9284 | 0 | unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; |
9285 | 0 | if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements()) |
9286 | 0 | Policy = RISCVII::TAIL_AGNOSTIC; |
9287 | | |
9288 | | // If we're inserting into the lowest elements, use a tail undisturbed |
9289 | | // vmv.v.v. |
9290 | 0 | if (OrigIdx == 0) { |
9291 | 0 | SubVec = |
9292 | 0 | DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL); |
9293 | 0 | } else { |
9294 | 0 | SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); |
9295 | 0 | SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, |
9296 | 0 | SlideupAmt, Mask, VL, Policy); |
9297 | 0 | } |
9298 | |
|
9299 | 0 | if (VecVT.isFixedLengthVector()) |
9300 | 0 | SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); |
9301 | 0 | return DAG.getBitcast(Op.getValueType(), SubVec); |
9302 | 0 | } |
9303 | | |
9304 | 0 | unsigned SubRegIdx, RemIdx; |
9305 | 0 | std::tie(SubRegIdx, RemIdx) = |
9306 | 0 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
9307 | 0 | VecVT, SubVecVT, OrigIdx, TRI); |
9308 | |
|
9309 | 0 | RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); |
9310 | 0 | bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || |
9311 | 0 | SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || |
9312 | 0 | SubVecLMUL == RISCVII::VLMUL::LMUL_F8; |
9313 | | |
9314 | | // 1. If the Idx has been completely eliminated and this subvector's size is |
9315 | | // a vector register or a multiple thereof, or the surrounding elements are |
9316 | | // undef, then this is a subvector insert which naturally aligns to a vector |
9317 | | // register. These can easily be handled using subregister manipulation. |
9318 | | // 2. If the subvector is smaller than a vector register, then the insertion |
9319 | | // must preserve the undisturbed elements of the register. We do this by |
9320 | | // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type |
9321 | | // (which resolves to a subregister copy), performing a VSLIDEUP to place the |
9322 | | // subvector within the vector register, and an INSERT_SUBVECTOR of that |
9323 | | // LMUL=1 type back into the larger vector (resolving to another subregister |
9324 | | // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type |
9325 | | // to avoid allocating a large register group to hold our subvector. |
9326 | 0 | if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) |
9327 | 0 | return Op; |
9328 | | |
9329 | | // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements |
9330 | | // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy |
9331 | | // (in our case undisturbed). This means we can set up a subvector insertion |
9332 | | // where OFFSET is the insertion offset, and the VL is the OFFSET plus the |
9333 | | // size of the subvector. |
9334 | 0 | MVT InterSubVT = VecVT; |
9335 | 0 | SDValue AlignedExtract = Vec; |
9336 | 0 | unsigned AlignedIdx = OrigIdx - RemIdx; |
9337 | 0 | if (VecVT.bitsGT(getLMUL1VT(VecVT))) { |
9338 | 0 | InterSubVT = getLMUL1VT(VecVT); |
9339 | | // Extract a subvector equal to the nearest full vector register type. This |
9340 | | // should resolve to a EXTRACT_SUBREG instruction. |
9341 | 0 | AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, |
9342 | 0 | DAG.getConstant(AlignedIdx, DL, XLenVT)); |
9343 | 0 | } |
9344 | |
|
9345 | 0 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, |
9346 | 0 | DAG.getUNDEF(InterSubVT), SubVec, |
9347 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9348 | |
|
9349 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); |
9350 | |
|
9351 | 0 | VL = computeVLMax(SubVecVT, DL, DAG); |
9352 | | |
9353 | | // If we're inserting into the lowest elements, use a tail undisturbed |
9354 | | // vmv.v.v. |
9355 | 0 | if (RemIdx == 0) { |
9356 | 0 | SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract, |
9357 | 0 | SubVec, VL); |
9358 | 0 | } else { |
9359 | 0 | SDValue SlideupAmt = |
9360 | 0 | DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); |
9361 | | |
9362 | | // Construct the vector length corresponding to RemIdx + length(SubVecVT). |
9363 | 0 | VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); |
9364 | |
|
9365 | 0 | SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec, |
9366 | 0 | SlideupAmt, Mask, VL); |
9367 | 0 | } |
9368 | | |
9369 | | // If required, insert this subvector back into the correct vector register. |
9370 | | // This should resolve to an INSERT_SUBREG instruction. |
9371 | 0 | if (VecVT.bitsGT(InterSubVT)) |
9372 | 0 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec, |
9373 | 0 | DAG.getConstant(AlignedIdx, DL, XLenVT)); |
9374 | | |
9375 | | // We might have bitcast from a mask type: cast back to the original type if |
9376 | | // required. |
9377 | 0 | return DAG.getBitcast(Op.getSimpleValueType(), SubVec); |
9378 | 0 | } |
9379 | | |
9380 | | SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, |
9381 | 0 | SelectionDAG &DAG) const { |
9382 | 0 | SDValue Vec = Op.getOperand(0); |
9383 | 0 | MVT SubVecVT = Op.getSimpleValueType(); |
9384 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
9385 | |
|
9386 | 0 | SDLoc DL(Op); |
9387 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9388 | 0 | unsigned OrigIdx = Op.getConstantOperandVal(1); |
9389 | 0 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
9390 | | |
9391 | | // We don't have the ability to slide mask vectors down indexed by their i1 |
9392 | | // elements; the smallest we can do is i8. Often we are able to bitcast to |
9393 | | // equivalent i8 vectors. Note that when extracting a fixed-length vector |
9394 | | // from a scalable one, we might not necessarily have enough scalable |
9395 | | // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. |
9396 | 0 | if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { |
9397 | 0 | if (VecVT.getVectorMinNumElements() >= 8 && |
9398 | 0 | SubVecVT.getVectorMinNumElements() >= 8) { |
9399 | 0 | assert(OrigIdx % 8 == 0 && "Invalid index"); |
9400 | 0 | assert(VecVT.getVectorMinNumElements() % 8 == 0 && |
9401 | 0 | SubVecVT.getVectorMinNumElements() % 8 == 0 && |
9402 | 0 | "Unexpected mask vector lowering"); |
9403 | 0 | OrigIdx /= 8; |
9404 | 0 | SubVecVT = |
9405 | 0 | MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, |
9406 | 0 | SubVecVT.isScalableVector()); |
9407 | 0 | VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, |
9408 | 0 | VecVT.isScalableVector()); |
9409 | 0 | Vec = DAG.getBitcast(VecVT, Vec); |
9410 | 0 | } else { |
9411 | | // We can't slide this mask vector down, indexed by its i1 elements. |
9412 | | // This poses a problem when we wish to extract a scalable vector which |
9413 | | // can't be re-expressed as a larger type. Just choose the slow path and |
9414 | | // extend to a larger type, then truncate back down. |
9415 | | // TODO: We could probably improve this when extracting certain fixed |
9416 | | // from fixed, where we can extract as i8 and shift the correct element |
9417 | | // right to reach the desired subvector? |
9418 | 0 | MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); |
9419 | 0 | MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); |
9420 | 0 | Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); |
9421 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, |
9422 | 0 | Op.getOperand(1)); |
9423 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); |
9424 | 0 | return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); |
9425 | 0 | } |
9426 | 0 | } |
9427 | | |
9428 | | // With an index of 0 this is a cast-like subvector, which can be performed |
9429 | | // with subregister operations. |
9430 | 0 | if (OrigIdx == 0) |
9431 | 0 | return Op; |
9432 | | |
9433 | | // If the subvector vector is a fixed-length type, we cannot use subregister |
9434 | | // manipulation to simplify the codegen; we don't know which register of a |
9435 | | // LMUL group contains the specific subvector as we only know the minimum |
9436 | | // register size. Therefore we must slide the vector group down the full |
9437 | | // amount. |
9438 | 0 | if (SubVecVT.isFixedLengthVector()) { |
9439 | 0 | MVT ContainerVT = VecVT; |
9440 | 0 | if (VecVT.isFixedLengthVector()) { |
9441 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
9442 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
9443 | 0 | } |
9444 | | |
9445 | | // Shrink down Vec so we're performing the slidedown on a smaller LMUL. |
9446 | 0 | unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; |
9447 | 0 | if (auto ShrunkVT = |
9448 | 0 | getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { |
9449 | 0 | ContainerVT = *ShrunkVT; |
9450 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, |
9451 | 0 | DAG.getVectorIdxConstant(0, DL)); |
9452 | 0 | } |
9453 | |
|
9454 | 0 | SDValue Mask = |
9455 | 0 | getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; |
9456 | | // Set the vector length to only the number of elements we care about. This |
9457 | | // avoids sliding down elements we're going to discard straight away. |
9458 | 0 | SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG, |
9459 | 0 | Subtarget); |
9460 | 0 | SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); |
9461 | 0 | SDValue Slidedown = |
9462 | 0 | getVSlidedown(DAG, Subtarget, DL, ContainerVT, |
9463 | 0 | DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); |
9464 | | // Now we can use a cast-like subvector extract to get the result. |
9465 | 0 | Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, |
9466 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9467 | 0 | return DAG.getBitcast(Op.getValueType(), Slidedown); |
9468 | 0 | } |
9469 | | |
9470 | 0 | unsigned SubRegIdx, RemIdx; |
9471 | 0 | std::tie(SubRegIdx, RemIdx) = |
9472 | 0 | RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( |
9473 | 0 | VecVT, SubVecVT, OrigIdx, TRI); |
9474 | | |
9475 | | // If the Idx has been completely eliminated then this is a subvector extract |
9476 | | // which naturally aligns to a vector register. These can easily be handled |
9477 | | // using subregister manipulation. |
9478 | 0 | if (RemIdx == 0) |
9479 | 0 | return Op; |
9480 | | |
9481 | | // Else SubVecVT is a fractional LMUL and may need to be slid down. |
9482 | 0 | assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); |
9483 | | |
9484 | | // If the vector type is an LMUL-group type, extract a subvector equal to the |
9485 | | // nearest full vector register type. |
9486 | 0 | MVT InterSubVT = VecVT; |
9487 | 0 | if (VecVT.bitsGT(getLMUL1VT(VecVT))) { |
9488 | | // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and |
9489 | | // we should have successfully decomposed the extract into a subregister. |
9490 | 0 | assert(SubRegIdx != RISCV::NoSubRegister); |
9491 | 0 | InterSubVT = getLMUL1VT(VecVT); |
9492 | 0 | Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); |
9493 | 0 | } |
9494 | | |
9495 | | // Slide this vector register down by the desired number of elements in order |
9496 | | // to place the desired subvector starting at element 0. |
9497 | 0 | SDValue SlidedownAmt = |
9498 | 0 | DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); |
9499 | |
|
9500 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); |
9501 | 0 | SDValue Slidedown = |
9502 | 0 | getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), |
9503 | 0 | Vec, SlidedownAmt, Mask, VL); |
9504 | | |
9505 | | // Now the vector is in the right position, extract our final subvector. This |
9506 | | // should resolve to a COPY. |
9507 | 0 | Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, |
9508 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9509 | | |
9510 | | // We might have bitcast from a mask type: cast back to the original type if |
9511 | | // required. |
9512 | 0 | return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); |
9513 | 0 | } |
9514 | | |
9515 | | // Widen a vector's operands to i8, then truncate its results back to the |
9516 | | // original type, typically i1. All operand and result types must be the same. |
9517 | | static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, |
9518 | 0 | SelectionDAG &DAG) { |
9519 | 0 | MVT VT = N.getSimpleValueType(); |
9520 | 0 | MVT WideVT = VT.changeVectorElementType(MVT::i8); |
9521 | 0 | SmallVector<SDValue, 4> WideOps; |
9522 | 0 | for (SDValue Op : N->ops()) { |
9523 | 0 | assert(Op.getSimpleValueType() == VT && |
9524 | 0 | "Operands and result must be same type"); |
9525 | 0 | WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op)); |
9526 | 0 | } |
9527 | |
|
9528 | 0 | unsigned NumVals = N->getNumValues(); |
9529 | |
|
9530 | 0 | SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>( |
9531 | 0 | NumVals, N.getValueType().changeVectorElementType(MVT::i8))); |
9532 | 0 | SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps); |
9533 | 0 | SmallVector<SDValue, 4> TruncVals; |
9534 | 0 | for (unsigned I = 0; I < NumVals; I++) { |
9535 | 0 | TruncVals.push_back( |
9536 | 0 | DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I), |
9537 | 0 | DAG.getConstant(0, DL, WideVT), ISD::SETNE)); |
9538 | 0 | } |
9539 | |
|
9540 | 0 | if (TruncVals.size() > 1) |
9541 | 0 | return DAG.getMergeValues(TruncVals, DL); |
9542 | 0 | return TruncVals.front(); |
9543 | 0 | } |
9544 | | |
9545 | | SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, |
9546 | 0 | SelectionDAG &DAG) const { |
9547 | 0 | SDLoc DL(Op); |
9548 | 0 | MVT VecVT = Op.getSimpleValueType(); |
9549 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9550 | |
|
9551 | 0 | assert(VecVT.isScalableVector() && |
9552 | 0 | "vector_interleave on non-scalable vector!"); |
9553 | | |
9554 | | // 1 bit element vectors need to be widened to e8 |
9555 | 0 | if (VecVT.getVectorElementType() == MVT::i1) |
9556 | 0 | return widenVectorOpsToi8(Op, DL, DAG); |
9557 | | |
9558 | | // If the VT is LMUL=8, we need to split and reassemble. |
9559 | 0 | if (VecVT.getSizeInBits().getKnownMinValue() == |
9560 | 0 | (8 * RISCV::RVVBitsPerBlock)) { |
9561 | 0 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); |
9562 | 0 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); |
9563 | 0 | EVT SplitVT = Op0Lo.getValueType(); |
9564 | |
|
9565 | 0 | SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, |
9566 | 0 | DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi); |
9567 | 0 | SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, |
9568 | 0 | DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi); |
9569 | |
|
9570 | 0 | SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, |
9571 | 0 | ResLo.getValue(0), ResHi.getValue(0)); |
9572 | 0 | SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1), |
9573 | 0 | ResHi.getValue(1)); |
9574 | 0 | return DAG.getMergeValues({Even, Odd}, DL); |
9575 | 0 | } |
9576 | | |
9577 | | // Concatenate the two vectors as one vector to deinterleave |
9578 | 0 | MVT ConcatVT = |
9579 | 0 | MVT::getVectorVT(VecVT.getVectorElementType(), |
9580 | 0 | VecVT.getVectorElementCount().multiplyCoefficientBy(2)); |
9581 | 0 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, |
9582 | 0 | Op.getOperand(0), Op.getOperand(1)); |
9583 | | |
9584 | | // We want to operate on all lanes, so get the mask and VL and mask for it |
9585 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget); |
9586 | 0 | SDValue Passthru = DAG.getUNDEF(ConcatVT); |
9587 | | |
9588 | | // We can deinterleave through vnsrl.wi if the element type is smaller than |
9589 | | // ELEN |
9590 | 0 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
9591 | 0 | SDValue Even = |
9592 | 0 | getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG); |
9593 | 0 | SDValue Odd = |
9594 | 0 | getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG); |
9595 | 0 | return DAG.getMergeValues({Even, Odd}, DL); |
9596 | 0 | } |
9597 | | |
9598 | | // For the indices, use the same SEW to avoid an extra vsetvli |
9599 | 0 | MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); |
9600 | | // Create a vector of even indices {0, 2, 4, ...} |
9601 | 0 | SDValue EvenIdx = |
9602 | 0 | DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2)); |
9603 | | // Create a vector of odd indices {1, 3, 5, ... } |
9604 | 0 | SDValue OddIdx = |
9605 | 0 | DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT)); |
9606 | | |
9607 | | // Gather the even and odd elements into two separate vectors |
9608 | 0 | SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, |
9609 | 0 | Concat, EvenIdx, Passthru, Mask, VL); |
9610 | 0 | SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT, |
9611 | 0 | Concat, OddIdx, Passthru, Mask, VL); |
9612 | | |
9613 | | // Extract the result half of the gather for even and odd |
9614 | 0 | SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide, |
9615 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9616 | 0 | SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide, |
9617 | 0 | DAG.getConstant(0, DL, XLenVT)); |
9618 | |
|
9619 | 0 | return DAG.getMergeValues({Even, Odd}, DL); |
9620 | 0 | } |
9621 | | |
9622 | | SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op, |
9623 | 0 | SelectionDAG &DAG) const { |
9624 | 0 | SDLoc DL(Op); |
9625 | 0 | MVT VecVT = Op.getSimpleValueType(); |
9626 | |
|
9627 | 0 | assert(VecVT.isScalableVector() && |
9628 | 0 | "vector_interleave on non-scalable vector!"); |
9629 | | |
9630 | | // i1 vectors need to be widened to i8 |
9631 | 0 | if (VecVT.getVectorElementType() == MVT::i1) |
9632 | 0 | return widenVectorOpsToi8(Op, DL, DAG); |
9633 | | |
9634 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9635 | 0 | SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); |
9636 | | |
9637 | | // If the VT is LMUL=8, we need to split and reassemble. |
9638 | 0 | if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) { |
9639 | 0 | auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); |
9640 | 0 | auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1); |
9641 | 0 | EVT SplitVT = Op0Lo.getValueType(); |
9642 | |
|
9643 | 0 | SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, |
9644 | 0 | DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo); |
9645 | 0 | SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, |
9646 | 0 | DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi); |
9647 | |
|
9648 | 0 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, |
9649 | 0 | ResLo.getValue(0), ResLo.getValue(1)); |
9650 | 0 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, |
9651 | 0 | ResHi.getValue(0), ResHi.getValue(1)); |
9652 | 0 | return DAG.getMergeValues({Lo, Hi}, DL); |
9653 | 0 | } |
9654 | | |
9655 | 0 | SDValue Interleaved; |
9656 | | |
9657 | | // If the element type is smaller than ELEN, then we can interleave with |
9658 | | // vwaddu.vv and vwmaccu.vx |
9659 | 0 | if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { |
9660 | 0 | Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL, |
9661 | 0 | DAG, Subtarget); |
9662 | 0 | } else { |
9663 | | // Otherwise, fallback to using vrgathere16.vv |
9664 | 0 | MVT ConcatVT = |
9665 | 0 | MVT::getVectorVT(VecVT.getVectorElementType(), |
9666 | 0 | VecVT.getVectorElementCount().multiplyCoefficientBy(2)); |
9667 | 0 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, |
9668 | 0 | Op.getOperand(0), Op.getOperand(1)); |
9669 | |
|
9670 | 0 | MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16); |
9671 | | |
9672 | | // 0 1 2 3 4 5 6 7 ... |
9673 | 0 | SDValue StepVec = DAG.getStepVector(DL, IdxVT); |
9674 | | |
9675 | | // 1 1 1 1 1 1 1 1 ... |
9676 | 0 | SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT)); |
9677 | | |
9678 | | // 1 0 1 0 1 0 1 0 ... |
9679 | 0 | SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones); |
9680 | 0 | OddMask = DAG.getSetCC( |
9681 | 0 | DL, IdxVT.changeVectorElementType(MVT::i1), OddMask, |
9682 | 0 | DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)), |
9683 | 0 | ISD::CondCode::SETNE); |
9684 | |
|
9685 | 0 | SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG)); |
9686 | | |
9687 | | // Build up the index vector for interleaving the concatenated vector |
9688 | | // 0 0 1 1 2 2 3 3 ... |
9689 | 0 | SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones); |
9690 | | // 0 n 1 n+1 2 n+2 3 n+3 ... |
9691 | 0 | Idx = |
9692 | 0 | DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL); |
9693 | | |
9694 | | // Then perform the interleave |
9695 | | // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ... |
9696 | 0 | SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG); |
9697 | 0 | Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT, |
9698 | 0 | Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL); |
9699 | 0 | } |
9700 | | |
9701 | | // Extract the two halves from the interleaved result |
9702 | 0 | SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, |
9703 | 0 | DAG.getVectorIdxConstant(0, DL)); |
9704 | 0 | SDValue Hi = DAG.getNode( |
9705 | 0 | ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved, |
9706 | 0 | DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL)); |
9707 | |
|
9708 | 0 | return DAG.getMergeValues({Lo, Hi}, DL); |
9709 | 0 | } |
9710 | | |
9711 | | // Lower step_vector to the vid instruction. Any non-identity step value must |
9712 | | // be accounted for my manual expansion. |
9713 | | SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, |
9714 | 0 | SelectionDAG &DAG) const { |
9715 | 0 | SDLoc DL(Op); |
9716 | 0 | MVT VT = Op.getSimpleValueType(); |
9717 | 0 | assert(VT.isScalableVector() && "Expected scalable vector"); |
9718 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9719 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); |
9720 | 0 | SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); |
9721 | 0 | uint64_t StepValImm = Op.getConstantOperandVal(0); |
9722 | 0 | if (StepValImm != 1) { |
9723 | 0 | if (isPowerOf2_64(StepValImm)) { |
9724 | 0 | SDValue StepVal = |
9725 | 0 | DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
9726 | 0 | DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); |
9727 | 0 | StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); |
9728 | 0 | } else { |
9729 | 0 | SDValue StepVal = lowerScalarSplat( |
9730 | 0 | SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), |
9731 | 0 | VL, VT, DL, DAG, Subtarget); |
9732 | 0 | StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); |
9733 | 0 | } |
9734 | 0 | } |
9735 | 0 | return StepVec; |
9736 | 0 | } |
9737 | | |
9738 | | // Implement vector_reverse using vrgather.vv with indices determined by |
9739 | | // subtracting the id of each element from (VLMAX-1). This will convert |
9740 | | // the indices like so: |
9741 | | // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). |
9742 | | // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
9743 | | SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, |
9744 | 0 | SelectionDAG &DAG) const { |
9745 | 0 | SDLoc DL(Op); |
9746 | 0 | MVT VecVT = Op.getSimpleValueType(); |
9747 | 0 | if (VecVT.getVectorElementType() == MVT::i1) { |
9748 | 0 | MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); |
9749 | 0 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); |
9750 | 0 | SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); |
9751 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); |
9752 | 0 | } |
9753 | 0 | unsigned EltSize = VecVT.getScalarSizeInBits(); |
9754 | 0 | unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); |
9755 | 0 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
9756 | 0 | unsigned MaxVLMAX = |
9757 | 0 | RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); |
9758 | |
|
9759 | 0 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
9760 | 0 | MVT IntVT = VecVT.changeVectorElementTypeToInteger(); |
9761 | | |
9762 | | // If this is SEW=8 and VLMAX is potentially more than 256, we need |
9763 | | // to use vrgatherei16.vv. |
9764 | | // TODO: It's also possible to use vrgatherei16.vv for other types to |
9765 | | // decrease register width for the index calculation. |
9766 | 0 | if (MaxVLMAX > 256 && EltSize == 8) { |
9767 | | // If this is LMUL=8, we have to split before can use vrgatherei16.vv. |
9768 | | // Reverse each half, then reassemble them in reverse order. |
9769 | | // NOTE: It's also possible that after splitting that VLMAX no longer |
9770 | | // requires vrgatherei16.vv. |
9771 | 0 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
9772 | 0 | auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); |
9773 | 0 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); |
9774 | 0 | Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); |
9775 | 0 | Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); |
9776 | | // Reassemble the low and high pieces reversed. |
9777 | | // FIXME: This is a CONCAT_VECTORS. |
9778 | 0 | SDValue Res = |
9779 | 0 | DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, |
9780 | 0 | DAG.getIntPtrConstant(0, DL)); |
9781 | 0 | return DAG.getNode( |
9782 | 0 | ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, |
9783 | 0 | DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); |
9784 | 0 | } |
9785 | | |
9786 | | // Just promote the int type to i16 which will double the LMUL. |
9787 | 0 | IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); |
9788 | 0 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
9789 | 0 | } |
9790 | | |
9791 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9792 | 0 | auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); |
9793 | | |
9794 | | // Calculate VLMAX-1 for the desired SEW. |
9795 | 0 | SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT, |
9796 | 0 | computeVLMax(VecVT, DL, DAG), |
9797 | 0 | DAG.getConstant(1, DL, XLenVT)); |
9798 | | |
9799 | | // Splat VLMAX-1 taking care to handle SEW==64 on RV32. |
9800 | 0 | bool IsRV32E64 = |
9801 | 0 | !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; |
9802 | 0 | SDValue SplatVL; |
9803 | 0 | if (!IsRV32E64) |
9804 | 0 | SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); |
9805 | 0 | else |
9806 | 0 | SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), |
9807 | 0 | VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); |
9808 | |
|
9809 | 0 | SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); |
9810 | 0 | SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, |
9811 | 0 | DAG.getUNDEF(IntVT), Mask, VL); |
9812 | |
|
9813 | 0 | return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, |
9814 | 0 | DAG.getUNDEF(VecVT), Mask, VL); |
9815 | 0 | } |
9816 | | |
9817 | | SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, |
9818 | 0 | SelectionDAG &DAG) const { |
9819 | 0 | SDLoc DL(Op); |
9820 | 0 | SDValue V1 = Op.getOperand(0); |
9821 | 0 | SDValue V2 = Op.getOperand(1); |
9822 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9823 | 0 | MVT VecVT = Op.getSimpleValueType(); |
9824 | |
|
9825 | 0 | SDValue VLMax = computeVLMax(VecVT, DL, DAG); |
9826 | |
|
9827 | 0 | int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); |
9828 | 0 | SDValue DownOffset, UpOffset; |
9829 | 0 | if (ImmValue >= 0) { |
9830 | | // The operand is a TargetConstant, we need to rebuild it as a regular |
9831 | | // constant. |
9832 | 0 | DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); |
9833 | 0 | UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); |
9834 | 0 | } else { |
9835 | | // The operand is a TargetConstant, we need to rebuild it as a regular |
9836 | | // constant rather than negating the original operand. |
9837 | 0 | UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); |
9838 | 0 | DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); |
9839 | 0 | } |
9840 | |
|
9841 | 0 | SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); |
9842 | |
|
9843 | 0 | SDValue SlideDown = |
9844 | 0 | getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, |
9845 | 0 | DownOffset, TrueMask, UpOffset); |
9846 | 0 | return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, |
9847 | 0 | TrueMask, DAG.getRegister(RISCV::X0, XLenVT), |
9848 | 0 | RISCVII::TAIL_AGNOSTIC); |
9849 | 0 | } |
9850 | | |
9851 | | SDValue |
9852 | | RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, |
9853 | 0 | SelectionDAG &DAG) const { |
9854 | 0 | SDLoc DL(Op); |
9855 | 0 | auto *Load = cast<LoadSDNode>(Op); |
9856 | |
|
9857 | 0 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
9858 | 0 | Load->getMemoryVT(), |
9859 | 0 | *Load->getMemOperand()) && |
9860 | 0 | "Expecting a correctly-aligned load"); |
9861 | | |
9862 | 0 | MVT VT = Op.getSimpleValueType(); |
9863 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9864 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9865 | | |
9866 | | // If we know the exact VLEN and our fixed length vector completely fills |
9867 | | // the container, use a whole register load instead. |
9868 | 0 | const auto [MinVLMAX, MaxVLMAX] = |
9869 | 0 | RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); |
9870 | 0 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
9871 | 0 | getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) { |
9872 | 0 | SDValue NewLoad = |
9873 | 0 | DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), |
9874 | 0 | Load->getMemOperand()); |
9875 | 0 | SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); |
9876 | 0 | return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); |
9877 | 0 | } |
9878 | | |
9879 | 0 | SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget); |
9880 | |
|
9881 | 0 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
9882 | 0 | SDValue IntID = DAG.getTargetConstant( |
9883 | 0 | IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); |
9884 | 0 | SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; |
9885 | 0 | if (!IsMaskOp) |
9886 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
9887 | 0 | Ops.push_back(Load->getBasePtr()); |
9888 | 0 | Ops.push_back(VL); |
9889 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
9890 | 0 | SDValue NewLoad = |
9891 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, |
9892 | 0 | Load->getMemoryVT(), Load->getMemOperand()); |
9893 | |
|
9894 | 0 | SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); |
9895 | 0 | return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); |
9896 | 0 | } |
9897 | | |
9898 | | SDValue |
9899 | | RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, |
9900 | 0 | SelectionDAG &DAG) const { |
9901 | 0 | SDLoc DL(Op); |
9902 | 0 | auto *Store = cast<StoreSDNode>(Op); |
9903 | |
|
9904 | 0 | assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
9905 | 0 | Store->getMemoryVT(), |
9906 | 0 | *Store->getMemOperand()) && |
9907 | 0 | "Expecting a correctly-aligned store"); |
9908 | | |
9909 | 0 | SDValue StoreVal = Store->getValue(); |
9910 | 0 | MVT VT = StoreVal.getSimpleValueType(); |
9911 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9912 | | |
9913 | | // If the size less than a byte, we need to pad with zeros to make a byte. |
9914 | 0 | if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { |
9915 | 0 | VT = MVT::v8i1; |
9916 | 0 | StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
9917 | 0 | DAG.getConstant(0, DL, VT), StoreVal, |
9918 | 0 | DAG.getIntPtrConstant(0, DL)); |
9919 | 0 | } |
9920 | |
|
9921 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
9922 | |
|
9923 | 0 | SDValue NewValue = |
9924 | 0 | convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); |
9925 | | |
9926 | | |
9927 | | // If we know the exact VLEN and our fixed length vector completely fills |
9928 | | // the container, use a whole register store instead. |
9929 | 0 | const auto [MinVLMAX, MaxVLMAX] = |
9930 | 0 | RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); |
9931 | 0 | if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() && |
9932 | 0 | getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) |
9933 | 0 | return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(), |
9934 | 0 | Store->getMemOperand()); |
9935 | | |
9936 | 0 | SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, |
9937 | 0 | Subtarget); |
9938 | |
|
9939 | 0 | bool IsMaskOp = VT.getVectorElementType() == MVT::i1; |
9940 | 0 | SDValue IntID = DAG.getTargetConstant( |
9941 | 0 | IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); |
9942 | 0 | return DAG.getMemIntrinsicNode( |
9943 | 0 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), |
9944 | 0 | {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, |
9945 | 0 | Store->getMemoryVT(), Store->getMemOperand()); |
9946 | 0 | } |
9947 | | |
9948 | | SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, |
9949 | 0 | SelectionDAG &DAG) const { |
9950 | 0 | SDLoc DL(Op); |
9951 | 0 | MVT VT = Op.getSimpleValueType(); |
9952 | |
|
9953 | 0 | const auto *MemSD = cast<MemSDNode>(Op); |
9954 | 0 | EVT MemVT = MemSD->getMemoryVT(); |
9955 | 0 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
9956 | 0 | SDValue Chain = MemSD->getChain(); |
9957 | 0 | SDValue BasePtr = MemSD->getBasePtr(); |
9958 | |
|
9959 | 0 | SDValue Mask, PassThru, VL; |
9960 | 0 | if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { |
9961 | 0 | Mask = VPLoad->getMask(); |
9962 | 0 | PassThru = DAG.getUNDEF(VT); |
9963 | 0 | VL = VPLoad->getVectorLength(); |
9964 | 0 | } else { |
9965 | 0 | const auto *MLoad = cast<MaskedLoadSDNode>(Op); |
9966 | 0 | Mask = MLoad->getMask(); |
9967 | 0 | PassThru = MLoad->getPassThru(); |
9968 | 0 | } |
9969 | |
|
9970 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
9971 | |
|
9972 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
9973 | |
|
9974 | 0 | MVT ContainerVT = VT; |
9975 | 0 | if (VT.isFixedLengthVector()) { |
9976 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
9977 | 0 | PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); |
9978 | 0 | if (!IsUnmasked) { |
9979 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
9980 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
9981 | 0 | } |
9982 | 0 | } |
9983 | |
|
9984 | 0 | if (!VL) |
9985 | 0 | VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
9986 | |
|
9987 | 0 | unsigned IntID = |
9988 | 0 | IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; |
9989 | 0 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; |
9990 | 0 | if (IsUnmasked) |
9991 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
9992 | 0 | else |
9993 | 0 | Ops.push_back(PassThru); |
9994 | 0 | Ops.push_back(BasePtr); |
9995 | 0 | if (!IsUnmasked) |
9996 | 0 | Ops.push_back(Mask); |
9997 | 0 | Ops.push_back(VL); |
9998 | 0 | if (!IsUnmasked) |
9999 | 0 | Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); |
10000 | |
|
10001 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
10002 | |
|
10003 | 0 | SDValue Result = |
10004 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); |
10005 | 0 | Chain = Result.getValue(1); |
10006 | |
|
10007 | 0 | if (VT.isFixedLengthVector()) |
10008 | 0 | Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
10009 | |
|
10010 | 0 | return DAG.getMergeValues({Result, Chain}, DL); |
10011 | 0 | } |
10012 | | |
10013 | | SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, |
10014 | 0 | SelectionDAG &DAG) const { |
10015 | 0 | SDLoc DL(Op); |
10016 | |
|
10017 | 0 | const auto *MemSD = cast<MemSDNode>(Op); |
10018 | 0 | EVT MemVT = MemSD->getMemoryVT(); |
10019 | 0 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10020 | 0 | SDValue Chain = MemSD->getChain(); |
10021 | 0 | SDValue BasePtr = MemSD->getBasePtr(); |
10022 | 0 | SDValue Val, Mask, VL; |
10023 | |
|
10024 | 0 | if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { |
10025 | 0 | Val = VPStore->getValue(); |
10026 | 0 | Mask = VPStore->getMask(); |
10027 | 0 | VL = VPStore->getVectorLength(); |
10028 | 0 | } else { |
10029 | 0 | const auto *MStore = cast<MaskedStoreSDNode>(Op); |
10030 | 0 | Val = MStore->getValue(); |
10031 | 0 | Mask = MStore->getMask(); |
10032 | 0 | } |
10033 | |
|
10034 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
10035 | |
|
10036 | 0 | MVT VT = Val.getSimpleValueType(); |
10037 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10038 | |
|
10039 | 0 | MVT ContainerVT = VT; |
10040 | 0 | if (VT.isFixedLengthVector()) { |
10041 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10042 | |
|
10043 | 0 | Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); |
10044 | 0 | if (!IsUnmasked) { |
10045 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
10046 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10047 | 0 | } |
10048 | 0 | } |
10049 | |
|
10050 | 0 | if (!VL) |
10051 | 0 | VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
10052 | |
|
10053 | 0 | unsigned IntID = |
10054 | 0 | IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; |
10055 | 0 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; |
10056 | 0 | Ops.push_back(Val); |
10057 | 0 | Ops.push_back(BasePtr); |
10058 | 0 | if (!IsUnmasked) |
10059 | 0 | Ops.push_back(Mask); |
10060 | 0 | Ops.push_back(VL); |
10061 | |
|
10062 | 0 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, |
10063 | 0 | DAG.getVTList(MVT::Other), Ops, MemVT, MMO); |
10064 | 0 | } |
10065 | | |
10066 | | SDValue |
10067 | | RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, |
10068 | 0 | SelectionDAG &DAG) const { |
10069 | 0 | MVT InVT = Op.getOperand(0).getSimpleValueType(); |
10070 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(InVT); |
10071 | |
|
10072 | 0 | MVT VT = Op.getSimpleValueType(); |
10073 | |
|
10074 | 0 | SDValue Op1 = |
10075 | 0 | convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); |
10076 | 0 | SDValue Op2 = |
10077 | 0 | convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); |
10078 | |
|
10079 | 0 | SDLoc DL(Op); |
10080 | 0 | auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, |
10081 | 0 | DAG, Subtarget); |
10082 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
10083 | |
|
10084 | 0 | SDValue Cmp = |
10085 | 0 | DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, |
10086 | 0 | {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); |
10087 | |
|
10088 | 0 | return convertFromScalableVector(VT, Cmp, DAG, Subtarget); |
10089 | 0 | } |
10090 | | |
10091 | | SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op, |
10092 | 0 | SelectionDAG &DAG) const { |
10093 | 0 | unsigned Opc = Op.getOpcode(); |
10094 | 0 | SDLoc DL(Op); |
10095 | 0 | SDValue Chain = Op.getOperand(0); |
10096 | 0 | SDValue Op1 = Op.getOperand(1); |
10097 | 0 | SDValue Op2 = Op.getOperand(2); |
10098 | 0 | SDValue CC = Op.getOperand(3); |
10099 | 0 | ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); |
10100 | 0 | MVT VT = Op.getSimpleValueType(); |
10101 | 0 | MVT InVT = Op1.getSimpleValueType(); |
10102 | | |
10103 | | // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE |
10104 | | // condition code. |
10105 | 0 | if (Opc == ISD::STRICT_FSETCCS) { |
10106 | | // Expand strict_fsetccs(x, oeq) to |
10107 | | // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole)) |
10108 | 0 | SDVTList VTList = Op->getVTList(); |
10109 | 0 | if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) { |
10110 | 0 | SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE); |
10111 | 0 | SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, |
10112 | 0 | Op2, OLECCVal); |
10113 | 0 | SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2, |
10114 | 0 | Op1, OLECCVal); |
10115 | 0 | SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
10116 | 0 | Tmp1.getValue(1), Tmp2.getValue(1)); |
10117 | | // Tmp1 and Tmp2 might be the same node. |
10118 | 0 | if (Tmp1 != Tmp2) |
10119 | 0 | Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2); |
10120 | 0 | return DAG.getMergeValues({Tmp1, OutChain}, DL); |
10121 | 0 | } |
10122 | | |
10123 | | // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq)) |
10124 | 0 | if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) { |
10125 | 0 | SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ); |
10126 | 0 | SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1, |
10127 | 0 | Op2, OEQCCVal); |
10128 | 0 | SDValue Res = DAG.getNOT(DL, OEQ, VT); |
10129 | 0 | return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL); |
10130 | 0 | } |
10131 | 0 | } |
10132 | | |
10133 | 0 | MVT ContainerInVT = InVT; |
10134 | 0 | if (InVT.isFixedLengthVector()) { |
10135 | 0 | ContainerInVT = getContainerForFixedLengthVector(InVT); |
10136 | 0 | Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget); |
10137 | 0 | Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget); |
10138 | 0 | } |
10139 | 0 | MVT MaskVT = getMaskTypeFor(ContainerInVT); |
10140 | |
|
10141 | 0 | auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget); |
10142 | |
|
10143 | 0 | SDValue Res; |
10144 | 0 | if (Opc == ISD::STRICT_FSETCC && |
10145 | 0 | (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE || |
10146 | 0 | CCVal == ISD::SETOLE)) { |
10147 | | // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only |
10148 | | // active when both input elements are ordered. |
10149 | 0 | SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG); |
10150 | 0 | SDValue OrderMask1 = DAG.getNode( |
10151 | 0 | RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), |
10152 | 0 | {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), |
10153 | 0 | True, VL}); |
10154 | 0 | SDValue OrderMask2 = DAG.getNode( |
10155 | 0 | RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other), |
10156 | 0 | {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT), |
10157 | 0 | True, VL}); |
10158 | 0 | Mask = |
10159 | 0 | DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL); |
10160 | | // Use Mask as the merge operand to let the result be 0 if either of the |
10161 | | // inputs is unordered. |
10162 | 0 | Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL, |
10163 | 0 | DAG.getVTList(MaskVT, MVT::Other), |
10164 | 0 | {Chain, Op1, Op2, CC, Mask, Mask, VL}); |
10165 | 0 | } else { |
10166 | 0 | unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL |
10167 | 0 | : RISCVISD::STRICT_FSETCCS_VL; |
10168 | 0 | Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other), |
10169 | 0 | {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL}); |
10170 | 0 | } |
10171 | |
|
10172 | 0 | if (VT.isFixedLengthVector()) { |
10173 | 0 | SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget); |
10174 | 0 | return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL); |
10175 | 0 | } |
10176 | 0 | return Res; |
10177 | 0 | } |
10178 | | |
10179 | | // Lower vector ABS to smax(X, sub(0, X)). |
10180 | 0 | SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { |
10181 | 0 | SDLoc DL(Op); |
10182 | 0 | MVT VT = Op.getSimpleValueType(); |
10183 | 0 | SDValue X = Op.getOperand(0); |
10184 | |
|
10185 | 0 | assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && |
10186 | 0 | "Unexpected type for ISD::ABS"); |
10187 | | |
10188 | 0 | MVT ContainerVT = VT; |
10189 | 0 | if (VT.isFixedLengthVector()) { |
10190 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10191 | 0 | X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); |
10192 | 0 | } |
10193 | |
|
10194 | 0 | SDValue Mask, VL; |
10195 | 0 | if (Op->getOpcode() == ISD::VP_ABS) { |
10196 | 0 | Mask = Op->getOperand(1); |
10197 | 0 | if (VT.isFixedLengthVector()) |
10198 | 0 | Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, |
10199 | 0 | Subtarget); |
10200 | 0 | VL = Op->getOperand(2); |
10201 | 0 | } else |
10202 | 0 | std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
10203 | |
|
10204 | 0 | SDValue SplatZero = DAG.getNode( |
10205 | 0 | RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), |
10206 | 0 | DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); |
10207 | 0 | SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, |
10208 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL); |
10209 | 0 | SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, |
10210 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL); |
10211 | |
|
10212 | 0 | if (VT.isFixedLengthVector()) |
10213 | 0 | Max = convertFromScalableVector(VT, Max, DAG, Subtarget); |
10214 | 0 | return Max; |
10215 | 0 | } |
10216 | | |
10217 | | SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( |
10218 | 0 | SDValue Op, SelectionDAG &DAG) const { |
10219 | 0 | SDLoc DL(Op); |
10220 | 0 | MVT VT = Op.getSimpleValueType(); |
10221 | 0 | SDValue Mag = Op.getOperand(0); |
10222 | 0 | SDValue Sign = Op.getOperand(1); |
10223 | 0 | assert(Mag.getValueType() == Sign.getValueType() && |
10224 | 0 | "Can only handle COPYSIGN with matching types."); |
10225 | | |
10226 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10227 | 0 | Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); |
10228 | 0 | Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); |
10229 | |
|
10230 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
10231 | |
|
10232 | 0 | SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, |
10233 | 0 | Sign, DAG.getUNDEF(ContainerVT), Mask, VL); |
10234 | |
|
10235 | 0 | return convertFromScalableVector(VT, CopySign, DAG, Subtarget); |
10236 | 0 | } |
10237 | | |
10238 | | SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( |
10239 | 0 | SDValue Op, SelectionDAG &DAG) const { |
10240 | 0 | MVT VT = Op.getSimpleValueType(); |
10241 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10242 | |
|
10243 | 0 | MVT I1ContainerVT = |
10244 | 0 | MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
10245 | |
|
10246 | 0 | SDValue CC = |
10247 | 0 | convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); |
10248 | 0 | SDValue Op1 = |
10249 | 0 | convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); |
10250 | 0 | SDValue Op2 = |
10251 | 0 | convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); |
10252 | |
|
10253 | 0 | SDLoc DL(Op); |
10254 | 0 | SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
10255 | |
|
10256 | 0 | SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, |
10257 | 0 | Op2, DAG.getUNDEF(ContainerVT), VL); |
10258 | |
|
10259 | 0 | return convertFromScalableVector(VT, Select, DAG, Subtarget); |
10260 | 0 | } |
10261 | | |
10262 | | SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, |
10263 | 0 | SelectionDAG &DAG) const { |
10264 | 0 | unsigned NewOpc = getRISCVVLOp(Op); |
10265 | 0 | bool HasMergeOp = hasMergeOp(NewOpc); |
10266 | 0 | bool HasMask = hasMaskOp(NewOpc); |
10267 | |
|
10268 | 0 | MVT VT = Op.getSimpleValueType(); |
10269 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(VT); |
10270 | | |
10271 | | // Create list of operands by converting existing ones to scalable types. |
10272 | 0 | SmallVector<SDValue, 6> Ops; |
10273 | 0 | for (const SDValue &V : Op->op_values()) { |
10274 | 0 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); |
10275 | | |
10276 | | // Pass through non-vector operands. |
10277 | 0 | if (!V.getValueType().isVector()) { |
10278 | 0 | Ops.push_back(V); |
10279 | 0 | continue; |
10280 | 0 | } |
10281 | | |
10282 | | // "cast" fixed length vector to a scalable vector. |
10283 | 0 | assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && |
10284 | 0 | "Only fixed length vectors are supported!"); |
10285 | 0 | Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); |
10286 | 0 | } |
10287 | |
|
10288 | 0 | SDLoc DL(Op); |
10289 | 0 | auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); |
10290 | 0 | if (HasMergeOp) |
10291 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
10292 | 0 | if (HasMask) |
10293 | 0 | Ops.push_back(Mask); |
10294 | 0 | Ops.push_back(VL); |
10295 | | |
10296 | | // StrictFP operations have two result values. Their lowered result should |
10297 | | // have same result count. |
10298 | 0 | if (Op->isStrictFPOpcode()) { |
10299 | 0 | SDValue ScalableRes = |
10300 | 0 | DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops, |
10301 | 0 | Op->getFlags()); |
10302 | 0 | SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); |
10303 | 0 | return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL); |
10304 | 0 | } |
10305 | | |
10306 | 0 | SDValue ScalableRes = |
10307 | 0 | DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); |
10308 | 0 | return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); |
10309 | 0 | } |
10310 | | |
10311 | | // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: |
10312 | | // * Operands of each node are assumed to be in the same order. |
10313 | | // * The EVL operand is promoted from i32 to i64 on RV64. |
10314 | | // * Fixed-length vectors are converted to their scalable-vector container |
10315 | | // types. |
10316 | 0 | SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { |
10317 | 0 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
10318 | 0 | bool HasMergeOp = hasMergeOp(RISCVISDOpc); |
10319 | |
|
10320 | 0 | SDLoc DL(Op); |
10321 | 0 | MVT VT = Op.getSimpleValueType(); |
10322 | 0 | SmallVector<SDValue, 4> Ops; |
10323 | |
|
10324 | 0 | MVT ContainerVT = VT; |
10325 | 0 | if (VT.isFixedLengthVector()) |
10326 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10327 | |
|
10328 | 0 | for (const auto &OpIdx : enumerate(Op->ops())) { |
10329 | 0 | SDValue V = OpIdx.value(); |
10330 | 0 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); |
10331 | | // Add dummy merge value before the mask. Or if there isn't a mask, before |
10332 | | // EVL. |
10333 | 0 | if (HasMergeOp) { |
10334 | 0 | auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode()); |
10335 | 0 | if (MaskIdx) { |
10336 | 0 | if (*MaskIdx == OpIdx.index()) |
10337 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
10338 | 0 | } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == |
10339 | 0 | OpIdx.index()) { |
10340 | 0 | if (Op.getOpcode() == ISD::VP_MERGE) { |
10341 | | // For VP_MERGE, copy the false operand instead of an undef value. |
10342 | 0 | Ops.push_back(Ops.back()); |
10343 | 0 | } else { |
10344 | 0 | assert(Op.getOpcode() == ISD::VP_SELECT); |
10345 | | // For VP_SELECT, add an undef value. |
10346 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
10347 | 0 | } |
10348 | 0 | } |
10349 | 0 | } |
10350 | | // Pass through operands which aren't fixed-length vectors. |
10351 | 0 | if (!V.getValueType().isFixedLengthVector()) { |
10352 | 0 | Ops.push_back(V); |
10353 | 0 | continue; |
10354 | 0 | } |
10355 | | // "cast" fixed length vector to a scalable vector. |
10356 | 0 | MVT OpVT = V.getSimpleValueType(); |
10357 | 0 | MVT ContainerVT = getContainerForFixedLengthVector(OpVT); |
10358 | 0 | assert(useRVVForFixedLengthVectorVT(OpVT) && |
10359 | 0 | "Only fixed length vectors are supported!"); |
10360 | 0 | Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); |
10361 | 0 | } |
10362 | |
|
10363 | 0 | if (!VT.isFixedLengthVector()) |
10364 | 0 | return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); |
10365 | | |
10366 | 0 | SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); |
10367 | |
|
10368 | 0 | return convertFromScalableVector(VT, VPOp, DAG, Subtarget); |
10369 | 0 | } |
10370 | | |
10371 | | SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, |
10372 | 0 | SelectionDAG &DAG) const { |
10373 | 0 | SDLoc DL(Op); |
10374 | 0 | MVT VT = Op.getSimpleValueType(); |
10375 | |
|
10376 | 0 | SDValue Src = Op.getOperand(0); |
10377 | | // NOTE: Mask is dropped. |
10378 | 0 | SDValue VL = Op.getOperand(2); |
10379 | |
|
10380 | 0 | MVT ContainerVT = VT; |
10381 | 0 | if (VT.isFixedLengthVector()) { |
10382 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10383 | 0 | MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); |
10384 | 0 | Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); |
10385 | 0 | } |
10386 | |
|
10387 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10388 | 0 | SDValue Zero = DAG.getConstant(0, DL, XLenVT); |
10389 | 0 | SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10390 | 0 | DAG.getUNDEF(ContainerVT), Zero, VL); |
10391 | |
|
10392 | 0 | SDValue SplatValue = DAG.getConstant( |
10393 | 0 | Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); |
10394 | 0 | SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10395 | 0 | DAG.getUNDEF(ContainerVT), SplatValue, VL); |
10396 | |
|
10397 | 0 | SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat, |
10398 | 0 | ZeroSplat, DAG.getUNDEF(ContainerVT), VL); |
10399 | 0 | if (!VT.isFixedLengthVector()) |
10400 | 0 | return Result; |
10401 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10402 | 0 | } |
10403 | | |
10404 | | SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, |
10405 | 0 | SelectionDAG &DAG) const { |
10406 | 0 | SDLoc DL(Op); |
10407 | 0 | MVT VT = Op.getSimpleValueType(); |
10408 | |
|
10409 | 0 | SDValue Op1 = Op.getOperand(0); |
10410 | 0 | SDValue Op2 = Op.getOperand(1); |
10411 | 0 | ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
10412 | | // NOTE: Mask is dropped. |
10413 | 0 | SDValue VL = Op.getOperand(4); |
10414 | |
|
10415 | 0 | MVT ContainerVT = VT; |
10416 | 0 | if (VT.isFixedLengthVector()) { |
10417 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10418 | 0 | Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); |
10419 | 0 | Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); |
10420 | 0 | } |
10421 | |
|
10422 | 0 | SDValue Result; |
10423 | 0 | SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); |
10424 | |
|
10425 | 0 | switch (Condition) { |
10426 | 0 | default: |
10427 | 0 | break; |
10428 | | // X != Y --> (X^Y) |
10429 | 0 | case ISD::SETNE: |
10430 | 0 | Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); |
10431 | 0 | break; |
10432 | | // X == Y --> ~(X^Y) |
10433 | 0 | case ISD::SETEQ: { |
10434 | 0 | SDValue Temp = |
10435 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); |
10436 | 0 | Result = |
10437 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); |
10438 | 0 | break; |
10439 | 0 | } |
10440 | | // X >s Y --> X == 0 & Y == 1 --> ~X & Y |
10441 | | // X <u Y --> X == 0 & Y == 1 --> ~X & Y |
10442 | 0 | case ISD::SETGT: |
10443 | 0 | case ISD::SETULT: { |
10444 | 0 | SDValue Temp = |
10445 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); |
10446 | 0 | Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); |
10447 | 0 | break; |
10448 | 0 | } |
10449 | | // X <s Y --> X == 1 & Y == 0 --> ~Y & X |
10450 | | // X >u Y --> X == 1 & Y == 0 --> ~Y & X |
10451 | 0 | case ISD::SETLT: |
10452 | 0 | case ISD::SETUGT: { |
10453 | 0 | SDValue Temp = |
10454 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); |
10455 | 0 | Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); |
10456 | 0 | break; |
10457 | 0 | } |
10458 | | // X >=s Y --> X == 0 | Y == 1 --> ~X | Y |
10459 | | // X <=u Y --> X == 0 | Y == 1 --> ~X | Y |
10460 | 0 | case ISD::SETGE: |
10461 | 0 | case ISD::SETULE: { |
10462 | 0 | SDValue Temp = |
10463 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); |
10464 | 0 | Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); |
10465 | 0 | break; |
10466 | 0 | } |
10467 | | // X <=s Y --> X == 1 | Y == 0 --> ~Y | X |
10468 | | // X >=u Y --> X == 1 | Y == 0 --> ~Y | X |
10469 | 0 | case ISD::SETLE: |
10470 | 0 | case ISD::SETUGE: { |
10471 | 0 | SDValue Temp = |
10472 | 0 | DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); |
10473 | 0 | Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); |
10474 | 0 | break; |
10475 | 0 | } |
10476 | 0 | } |
10477 | | |
10478 | 0 | if (!VT.isFixedLengthVector()) |
10479 | 0 | return Result; |
10480 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10481 | 0 | } |
10482 | | |
10483 | | // Lower Floating-Point/Integer Type-Convert VP SDNodes |
10484 | | SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, |
10485 | 0 | SelectionDAG &DAG) const { |
10486 | 0 | SDLoc DL(Op); |
10487 | |
|
10488 | 0 | SDValue Src = Op.getOperand(0); |
10489 | 0 | SDValue Mask = Op.getOperand(1); |
10490 | 0 | SDValue VL = Op.getOperand(2); |
10491 | 0 | unsigned RISCVISDOpc = getRISCVVLOp(Op); |
10492 | |
|
10493 | 0 | MVT DstVT = Op.getSimpleValueType(); |
10494 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
10495 | 0 | if (DstVT.isFixedLengthVector()) { |
10496 | 0 | DstVT = getContainerForFixedLengthVector(DstVT); |
10497 | 0 | SrcVT = getContainerForFixedLengthVector(SrcVT); |
10498 | 0 | Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); |
10499 | 0 | MVT MaskVT = getMaskTypeFor(DstVT); |
10500 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10501 | 0 | } |
10502 | |
|
10503 | 0 | unsigned DstEltSize = DstVT.getScalarSizeInBits(); |
10504 | 0 | unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); |
10505 | |
|
10506 | 0 | SDValue Result; |
10507 | 0 | if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. |
10508 | 0 | if (SrcVT.isInteger()) { |
10509 | 0 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); |
10510 | | |
10511 | 0 | unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL |
10512 | 0 | ? RISCVISD::VSEXT_VL |
10513 | 0 | : RISCVISD::VZEXT_VL; |
10514 | | |
10515 | | // Do we need to do any pre-widening before converting? |
10516 | 0 | if (SrcEltSize == 1) { |
10517 | 0 | MVT IntVT = DstVT.changeVectorElementTypeToInteger(); |
10518 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10519 | 0 | SDValue Zero = DAG.getConstant(0, DL, XLenVT); |
10520 | 0 | SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, |
10521 | 0 | DAG.getUNDEF(IntVT), Zero, VL); |
10522 | 0 | SDValue One = DAG.getConstant( |
10523 | 0 | RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); |
10524 | 0 | SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, |
10525 | 0 | DAG.getUNDEF(IntVT), One, VL); |
10526 | 0 | Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat, |
10527 | 0 | ZeroSplat, DAG.getUNDEF(IntVT), VL); |
10528 | 0 | } else if (DstEltSize > (2 * SrcEltSize)) { |
10529 | | // Widen before converting. |
10530 | 0 | MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), |
10531 | 0 | DstVT.getVectorElementCount()); |
10532 | 0 | Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); |
10533 | 0 | } |
10534 | |
|
10535 | 0 | Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); |
10536 | 0 | } else { |
10537 | 0 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
10538 | 0 | "Wrong input/output vector types"); |
10539 | | |
10540 | | // Convert f16 to f32 then convert f32 to i64. |
10541 | 0 | if (DstEltSize > (2 * SrcEltSize)) { |
10542 | 0 | assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); |
10543 | 0 | MVT InterimFVT = |
10544 | 0 | MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); |
10545 | 0 | Src = |
10546 | 0 | DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); |
10547 | 0 | } |
10548 | | |
10549 | 0 | Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); |
10550 | 0 | } |
10551 | 0 | } else { // Narrowing + Conversion |
10552 | 0 | if (SrcVT.isInteger()) { |
10553 | 0 | assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); |
10554 | | // First do a narrowing convert to an FP type half the size, then round |
10555 | | // the FP type to a small FP type if needed. |
10556 | | |
10557 | 0 | MVT InterimFVT = DstVT; |
10558 | 0 | if (SrcEltSize > (2 * DstEltSize)) { |
10559 | 0 | assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); |
10560 | 0 | assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); |
10561 | 0 | InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); |
10562 | 0 | } |
10563 | | |
10564 | 0 | Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); |
10565 | |
|
10566 | 0 | if (InterimFVT != DstVT) { |
10567 | 0 | Src = Result; |
10568 | 0 | Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); |
10569 | 0 | } |
10570 | 0 | } else { |
10571 | 0 | assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && |
10572 | 0 | "Wrong input/output vector types"); |
10573 | | // First do a narrowing conversion to an integer half the size, then |
10574 | | // truncate if needed. |
10575 | | |
10576 | 0 | if (DstEltSize == 1) { |
10577 | | // First convert to the same size integer, then convert to mask using |
10578 | | // setcc. |
10579 | 0 | assert(SrcEltSize >= 16 && "Unexpected FP type!"); |
10580 | 0 | MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), |
10581 | 0 | DstVT.getVectorElementCount()); |
10582 | 0 | Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); |
10583 | | |
10584 | | // Compare the integer result to 0. The integer should be 0 or 1/-1, |
10585 | | // otherwise the conversion was undefined. |
10586 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10587 | 0 | SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); |
10588 | 0 | SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, |
10589 | 0 | DAG.getUNDEF(InterimIVT), SplatZero, VL); |
10590 | 0 | Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, |
10591 | 0 | {Result, SplatZero, DAG.getCondCode(ISD::SETNE), |
10592 | 0 | DAG.getUNDEF(DstVT), Mask, VL}); |
10593 | 0 | } else { |
10594 | 0 | MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), |
10595 | 0 | DstVT.getVectorElementCount()); |
10596 | |
|
10597 | 0 | Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); |
10598 | |
|
10599 | 0 | while (InterimIVT != DstVT) { |
10600 | 0 | SrcEltSize /= 2; |
10601 | 0 | Src = Result; |
10602 | 0 | InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), |
10603 | 0 | DstVT.getVectorElementCount()); |
10604 | 0 | Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, |
10605 | 0 | Src, Mask, VL); |
10606 | 0 | } |
10607 | 0 | } |
10608 | 0 | } |
10609 | 0 | } |
10610 | | |
10611 | 0 | MVT VT = Op.getSimpleValueType(); |
10612 | 0 | if (!VT.isFixedLengthVector()) |
10613 | 0 | return Result; |
10614 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10615 | 0 | } |
10616 | | |
10617 | | SDValue |
10618 | | RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, |
10619 | 0 | SelectionDAG &DAG) const { |
10620 | 0 | SDLoc DL(Op); |
10621 | |
|
10622 | 0 | SDValue Op1 = Op.getOperand(0); |
10623 | 0 | SDValue Op2 = Op.getOperand(1); |
10624 | 0 | SDValue Offset = Op.getOperand(2); |
10625 | 0 | SDValue Mask = Op.getOperand(3); |
10626 | 0 | SDValue EVL1 = Op.getOperand(4); |
10627 | 0 | SDValue EVL2 = Op.getOperand(5); |
10628 | |
|
10629 | 0 | const MVT XLenVT = Subtarget.getXLenVT(); |
10630 | 0 | MVT VT = Op.getSimpleValueType(); |
10631 | 0 | MVT ContainerVT = VT; |
10632 | 0 | if (VT.isFixedLengthVector()) { |
10633 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10634 | 0 | Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); |
10635 | 0 | Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); |
10636 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
10637 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10638 | 0 | } |
10639 | |
|
10640 | 0 | bool IsMaskVector = VT.getVectorElementType() == MVT::i1; |
10641 | 0 | if (IsMaskVector) { |
10642 | 0 | ContainerVT = ContainerVT.changeVectorElementType(MVT::i8); |
10643 | | |
10644 | | // Expand input operands |
10645 | 0 | SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10646 | 0 | DAG.getUNDEF(ContainerVT), |
10647 | 0 | DAG.getConstant(1, DL, XLenVT), EVL1); |
10648 | 0 | SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10649 | 0 | DAG.getUNDEF(ContainerVT), |
10650 | 0 | DAG.getConstant(0, DL, XLenVT), EVL1); |
10651 | 0 | Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1, |
10652 | 0 | SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1); |
10653 | |
|
10654 | 0 | SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10655 | 0 | DAG.getUNDEF(ContainerVT), |
10656 | 0 | DAG.getConstant(1, DL, XLenVT), EVL2); |
10657 | 0 | SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
10658 | 0 | DAG.getUNDEF(ContainerVT), |
10659 | 0 | DAG.getConstant(0, DL, XLenVT), EVL2); |
10660 | 0 | Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2, |
10661 | 0 | SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); |
10662 | 0 | } |
10663 | |
|
10664 | 0 | int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); |
10665 | 0 | SDValue DownOffset, UpOffset; |
10666 | 0 | if (ImmValue >= 0) { |
10667 | | // The operand is a TargetConstant, we need to rebuild it as a regular |
10668 | | // constant. |
10669 | 0 | DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); |
10670 | 0 | UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset); |
10671 | 0 | } else { |
10672 | | // The operand is a TargetConstant, we need to rebuild it as a regular |
10673 | | // constant rather than negating the original operand. |
10674 | 0 | UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); |
10675 | 0 | DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset); |
10676 | 0 | } |
10677 | |
|
10678 | 0 | SDValue SlideDown = |
10679 | 0 | getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), |
10680 | 0 | Op1, DownOffset, Mask, UpOffset); |
10681 | 0 | SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2, |
10682 | 0 | UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC); |
10683 | |
|
10684 | 0 | if (IsMaskVector) { |
10685 | | // Truncate Result back to a mask vector (Result has same EVL as Op2) |
10686 | 0 | Result = DAG.getNode( |
10687 | 0 | RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1), |
10688 | 0 | {Result, DAG.getConstant(0, DL, ContainerVT), |
10689 | 0 | DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)), |
10690 | 0 | Mask, EVL2}); |
10691 | 0 | } |
10692 | |
|
10693 | 0 | if (!VT.isFixedLengthVector()) |
10694 | 0 | return Result; |
10695 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10696 | 0 | } |
10697 | | |
10698 | | SDValue |
10699 | | RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, |
10700 | 0 | SelectionDAG &DAG) const { |
10701 | 0 | SDLoc DL(Op); |
10702 | 0 | MVT VT = Op.getSimpleValueType(); |
10703 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10704 | |
|
10705 | 0 | SDValue Op1 = Op.getOperand(0); |
10706 | 0 | SDValue Mask = Op.getOperand(1); |
10707 | 0 | SDValue EVL = Op.getOperand(2); |
10708 | |
|
10709 | 0 | MVT ContainerVT = VT; |
10710 | 0 | if (VT.isFixedLengthVector()) { |
10711 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10712 | 0 | Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); |
10713 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
10714 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10715 | 0 | } |
10716 | |
|
10717 | 0 | MVT GatherVT = ContainerVT; |
10718 | 0 | MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger(); |
10719 | | // Check if we are working with mask vectors |
10720 | 0 | bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1; |
10721 | 0 | if (IsMaskVector) { |
10722 | 0 | GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8); |
10723 | | |
10724 | | // Expand input operand |
10725 | 0 | SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, |
10726 | 0 | DAG.getUNDEF(IndicesVT), |
10727 | 0 | DAG.getConstant(1, DL, XLenVT), EVL); |
10728 | 0 | SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, |
10729 | 0 | DAG.getUNDEF(IndicesVT), |
10730 | 0 | DAG.getConstant(0, DL, XLenVT), EVL); |
10731 | 0 | Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne, |
10732 | 0 | SplatZero, DAG.getUNDEF(IndicesVT), EVL); |
10733 | 0 | } |
10734 | |
|
10735 | 0 | unsigned EltSize = GatherVT.getScalarSizeInBits(); |
10736 | 0 | unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue(); |
10737 | 0 | unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); |
10738 | 0 | unsigned MaxVLMAX = |
10739 | 0 | RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); |
10740 | |
|
10741 | 0 | unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; |
10742 | | // If this is SEW=8 and VLMAX is unknown or more than 256, we need |
10743 | | // to use vrgatherei16.vv. |
10744 | | // TODO: It's also possible to use vrgatherei16.vv for other types to |
10745 | | // decrease register width for the index calculation. |
10746 | | // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. |
10747 | 0 | if (MaxVLMAX > 256 && EltSize == 8) { |
10748 | | // If this is LMUL=8, we have to split before using vrgatherei16.vv. |
10749 | | // Split the vector in half and reverse each half using a full register |
10750 | | // reverse. |
10751 | | // Swap the halves and concatenate them. |
10752 | | // Slide the concatenated result by (VLMax - VL). |
10753 | 0 | if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { |
10754 | 0 | auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT); |
10755 | 0 | auto [Lo, Hi] = DAG.SplitVector(Op1, DL); |
10756 | |
|
10757 | 0 | SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); |
10758 | 0 | SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); |
10759 | | |
10760 | | // Reassemble the low and high pieces reversed. |
10761 | | // NOTE: this Result is unmasked (because we do not need masks for |
10762 | | // shuffles). If in the future this has to change, we can use a SELECT_VL |
10763 | | // between Result and UNDEF using the mask originally passed to VP_REVERSE |
10764 | 0 | SDValue Result = |
10765 | 0 | DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev); |
10766 | | |
10767 | | // Slide off any elements from past EVL that were reversed into the low |
10768 | | // elements. |
10769 | 0 | unsigned MinElts = GatherVT.getVectorMinNumElements(); |
10770 | 0 | SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, |
10771 | 0 | DAG.getConstant(MinElts, DL, XLenVT)); |
10772 | 0 | SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL); |
10773 | |
|
10774 | 0 | Result = getVSlidedown(DAG, Subtarget, DL, GatherVT, |
10775 | 0 | DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL); |
10776 | |
|
10777 | 0 | if (IsMaskVector) { |
10778 | | // Truncate Result back to a mask vector |
10779 | 0 | Result = |
10780 | 0 | DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT, |
10781 | 0 | {Result, DAG.getConstant(0, DL, GatherVT), |
10782 | 0 | DAG.getCondCode(ISD::SETNE), |
10783 | 0 | DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); |
10784 | 0 | } |
10785 | |
|
10786 | 0 | if (!VT.isFixedLengthVector()) |
10787 | 0 | return Result; |
10788 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10789 | 0 | } |
10790 | | |
10791 | | // Just promote the int type to i16 which will double the LMUL. |
10792 | 0 | IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount()); |
10793 | 0 | GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; |
10794 | 0 | } |
10795 | | |
10796 | 0 | SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); |
10797 | 0 | SDValue VecLen = |
10798 | 0 | DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); |
10799 | 0 | SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, |
10800 | 0 | DAG.getUNDEF(IndicesVT), VecLen, EVL); |
10801 | 0 | SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID, |
10802 | 0 | DAG.getUNDEF(IndicesVT), Mask, EVL); |
10803 | 0 | SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB, |
10804 | 0 | DAG.getUNDEF(GatherVT), Mask, EVL); |
10805 | |
|
10806 | 0 | if (IsMaskVector) { |
10807 | | // Truncate Result back to a mask vector |
10808 | 0 | Result = DAG.getNode( |
10809 | 0 | RISCVISD::SETCC_VL, DL, ContainerVT, |
10810 | 0 | {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE), |
10811 | 0 | DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL}); |
10812 | 0 | } |
10813 | |
|
10814 | 0 | if (!VT.isFixedLengthVector()) |
10815 | 0 | return Result; |
10816 | 0 | return convertFromScalableVector(VT, Result, DAG, Subtarget); |
10817 | 0 | } |
10818 | | |
10819 | | SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, |
10820 | 0 | SelectionDAG &DAG) const { |
10821 | 0 | MVT VT = Op.getSimpleValueType(); |
10822 | 0 | if (VT.getVectorElementType() != MVT::i1) |
10823 | 0 | return lowerVPOp(Op, DAG); |
10824 | | |
10825 | | // It is safe to drop mask parameter as masked-off elements are undef. |
10826 | 0 | SDValue Op1 = Op->getOperand(0); |
10827 | 0 | SDValue Op2 = Op->getOperand(1); |
10828 | 0 | SDValue VL = Op->getOperand(3); |
10829 | |
|
10830 | 0 | MVT ContainerVT = VT; |
10831 | 0 | const bool IsFixed = VT.isFixedLengthVector(); |
10832 | 0 | if (IsFixed) { |
10833 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10834 | 0 | Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); |
10835 | 0 | Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); |
10836 | 0 | } |
10837 | |
|
10838 | 0 | SDLoc DL(Op); |
10839 | 0 | SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL); |
10840 | 0 | if (!IsFixed) |
10841 | 0 | return Val; |
10842 | 0 | return convertFromScalableVector(VT, Val, DAG, Subtarget); |
10843 | 0 | } |
10844 | | |
10845 | | SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, |
10846 | 0 | SelectionDAG &DAG) const { |
10847 | 0 | SDLoc DL(Op); |
10848 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10849 | 0 | MVT VT = Op.getSimpleValueType(); |
10850 | 0 | MVT ContainerVT = VT; |
10851 | 0 | if (VT.isFixedLengthVector()) |
10852 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10853 | |
|
10854 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
10855 | |
|
10856 | 0 | auto *VPNode = cast<VPStridedLoadSDNode>(Op); |
10857 | | // Check if the mask is known to be all ones |
10858 | 0 | SDValue Mask = VPNode->getMask(); |
10859 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
10860 | |
|
10861 | 0 | SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse |
10862 | 0 | : Intrinsic::riscv_vlse_mask, |
10863 | 0 | DL, XLenVT); |
10864 | 0 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, |
10865 | 0 | DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), |
10866 | 0 | VPNode->getStride()}; |
10867 | 0 | if (!IsUnmasked) { |
10868 | 0 | if (VT.isFixedLengthVector()) { |
10869 | 0 | MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); |
10870 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10871 | 0 | } |
10872 | 0 | Ops.push_back(Mask); |
10873 | 0 | } |
10874 | 0 | Ops.push_back(VPNode->getVectorLength()); |
10875 | 0 | if (!IsUnmasked) { |
10876 | 0 | SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); |
10877 | 0 | Ops.push_back(Policy); |
10878 | 0 | } |
10879 | |
|
10880 | 0 | SDValue Result = |
10881 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, |
10882 | 0 | VPNode->getMemoryVT(), VPNode->getMemOperand()); |
10883 | 0 | SDValue Chain = Result.getValue(1); |
10884 | |
|
10885 | 0 | if (VT.isFixedLengthVector()) |
10886 | 0 | Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
10887 | |
|
10888 | 0 | return DAG.getMergeValues({Result, Chain}, DL); |
10889 | 0 | } |
10890 | | |
10891 | | SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, |
10892 | 0 | SelectionDAG &DAG) const { |
10893 | 0 | SDLoc DL(Op); |
10894 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10895 | |
|
10896 | 0 | auto *VPNode = cast<VPStridedStoreSDNode>(Op); |
10897 | 0 | SDValue StoreVal = VPNode->getValue(); |
10898 | 0 | MVT VT = StoreVal.getSimpleValueType(); |
10899 | 0 | MVT ContainerVT = VT; |
10900 | 0 | if (VT.isFixedLengthVector()) { |
10901 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10902 | 0 | StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); |
10903 | 0 | } |
10904 | | |
10905 | | // Check if the mask is known to be all ones |
10906 | 0 | SDValue Mask = VPNode->getMask(); |
10907 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
10908 | |
|
10909 | 0 | SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse |
10910 | 0 | : Intrinsic::riscv_vsse_mask, |
10911 | 0 | DL, XLenVT); |
10912 | 0 | SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, |
10913 | 0 | VPNode->getBasePtr(), VPNode->getStride()}; |
10914 | 0 | if (!IsUnmasked) { |
10915 | 0 | if (VT.isFixedLengthVector()) { |
10916 | 0 | MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); |
10917 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10918 | 0 | } |
10919 | 0 | Ops.push_back(Mask); |
10920 | 0 | } |
10921 | 0 | Ops.push_back(VPNode->getVectorLength()); |
10922 | |
|
10923 | 0 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), |
10924 | 0 | Ops, VPNode->getMemoryVT(), |
10925 | 0 | VPNode->getMemOperand()); |
10926 | 0 | } |
10927 | | |
10928 | | // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be |
10929 | | // matched to a RVV indexed load. The RVV indexed load instructions only |
10930 | | // support the "unsigned unscaled" addressing mode; indices are implicitly |
10931 | | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
10932 | | // signed or scaled indexing is extended to the XLEN value type and scaled |
10933 | | // accordingly. |
10934 | | SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, |
10935 | 0 | SelectionDAG &DAG) const { |
10936 | 0 | SDLoc DL(Op); |
10937 | 0 | MVT VT = Op.getSimpleValueType(); |
10938 | |
|
10939 | 0 | const auto *MemSD = cast<MemSDNode>(Op.getNode()); |
10940 | 0 | EVT MemVT = MemSD->getMemoryVT(); |
10941 | 0 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
10942 | 0 | SDValue Chain = MemSD->getChain(); |
10943 | 0 | SDValue BasePtr = MemSD->getBasePtr(); |
10944 | |
|
10945 | 0 | ISD::LoadExtType LoadExtType; |
10946 | 0 | SDValue Index, Mask, PassThru, VL; |
10947 | |
|
10948 | 0 | if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { |
10949 | 0 | Index = VPGN->getIndex(); |
10950 | 0 | Mask = VPGN->getMask(); |
10951 | 0 | PassThru = DAG.getUNDEF(VT); |
10952 | 0 | VL = VPGN->getVectorLength(); |
10953 | | // VP doesn't support extending loads. |
10954 | 0 | LoadExtType = ISD::NON_EXTLOAD; |
10955 | 0 | } else { |
10956 | | // Else it must be a MGATHER. |
10957 | 0 | auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); |
10958 | 0 | Index = MGN->getIndex(); |
10959 | 0 | Mask = MGN->getMask(); |
10960 | 0 | PassThru = MGN->getPassThru(); |
10961 | 0 | LoadExtType = MGN->getExtensionType(); |
10962 | 0 | } |
10963 | |
|
10964 | 0 | MVT IndexVT = Index.getSimpleValueType(); |
10965 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
10966 | |
|
10967 | 0 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
10968 | 0 | "Unexpected VTs!"); |
10969 | 0 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); |
10970 | | // Targets have to explicitly opt-in for extending vector loads. |
10971 | 0 | assert(LoadExtType == ISD::NON_EXTLOAD && |
10972 | 0 | "Unexpected extending MGATHER/VP_GATHER"); |
10973 | 0 | (void)LoadExtType; |
10974 | | |
10975 | | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
10976 | | // the selection of the masked intrinsics doesn't do this for us. |
10977 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
10978 | |
|
10979 | 0 | MVT ContainerVT = VT; |
10980 | 0 | if (VT.isFixedLengthVector()) { |
10981 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
10982 | 0 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), |
10983 | 0 | ContainerVT.getVectorElementCount()); |
10984 | |
|
10985 | 0 | Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); |
10986 | |
|
10987 | 0 | if (!IsUnmasked) { |
10988 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
10989 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
10990 | 0 | PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); |
10991 | 0 | } |
10992 | 0 | } |
10993 | |
|
10994 | 0 | if (!VL) |
10995 | 0 | VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
10996 | |
|
10997 | 0 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { |
10998 | 0 | IndexVT = IndexVT.changeVectorElementType(XLenVT); |
10999 | 0 | Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); |
11000 | 0 | } |
11001 | |
|
11002 | 0 | unsigned IntID = |
11003 | 0 | IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; |
11004 | 0 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; |
11005 | 0 | if (IsUnmasked) |
11006 | 0 | Ops.push_back(DAG.getUNDEF(ContainerVT)); |
11007 | 0 | else |
11008 | 0 | Ops.push_back(PassThru); |
11009 | 0 | Ops.push_back(BasePtr); |
11010 | 0 | Ops.push_back(Index); |
11011 | 0 | if (!IsUnmasked) |
11012 | 0 | Ops.push_back(Mask); |
11013 | 0 | Ops.push_back(VL); |
11014 | 0 | if (!IsUnmasked) |
11015 | 0 | Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); |
11016 | |
|
11017 | 0 | SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); |
11018 | 0 | SDValue Result = |
11019 | 0 | DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); |
11020 | 0 | Chain = Result.getValue(1); |
11021 | |
|
11022 | 0 | if (VT.isFixedLengthVector()) |
11023 | 0 | Result = convertFromScalableVector(VT, Result, DAG, Subtarget); |
11024 | |
|
11025 | 0 | return DAG.getMergeValues({Result, Chain}, DL); |
11026 | 0 | } |
11027 | | |
11028 | | // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be |
11029 | | // matched to a RVV indexed store. The RVV indexed store instructions only |
11030 | | // support the "unsigned unscaled" addressing mode; indices are implicitly |
11031 | | // zero-extended or truncated to XLEN and are treated as byte offsets. Any |
11032 | | // signed or scaled indexing is extended to the XLEN value type and scaled |
11033 | | // accordingly. |
11034 | | SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, |
11035 | 0 | SelectionDAG &DAG) const { |
11036 | 0 | SDLoc DL(Op); |
11037 | 0 | const auto *MemSD = cast<MemSDNode>(Op.getNode()); |
11038 | 0 | EVT MemVT = MemSD->getMemoryVT(); |
11039 | 0 | MachineMemOperand *MMO = MemSD->getMemOperand(); |
11040 | 0 | SDValue Chain = MemSD->getChain(); |
11041 | 0 | SDValue BasePtr = MemSD->getBasePtr(); |
11042 | |
|
11043 | 0 | bool IsTruncatingStore = false; |
11044 | 0 | SDValue Index, Mask, Val, VL; |
11045 | |
|
11046 | 0 | if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { |
11047 | 0 | Index = VPSN->getIndex(); |
11048 | 0 | Mask = VPSN->getMask(); |
11049 | 0 | Val = VPSN->getValue(); |
11050 | 0 | VL = VPSN->getVectorLength(); |
11051 | | // VP doesn't support truncating stores. |
11052 | 0 | IsTruncatingStore = false; |
11053 | 0 | } else { |
11054 | | // Else it must be a MSCATTER. |
11055 | 0 | auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); |
11056 | 0 | Index = MSN->getIndex(); |
11057 | 0 | Mask = MSN->getMask(); |
11058 | 0 | Val = MSN->getValue(); |
11059 | 0 | IsTruncatingStore = MSN->isTruncatingStore(); |
11060 | 0 | } |
11061 | |
|
11062 | 0 | MVT VT = Val.getSimpleValueType(); |
11063 | 0 | MVT IndexVT = Index.getSimpleValueType(); |
11064 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11065 | |
|
11066 | 0 | assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && |
11067 | 0 | "Unexpected VTs!"); |
11068 | 0 | assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); |
11069 | | // Targets have to explicitly opt-in for extending vector loads and |
11070 | | // truncating vector stores. |
11071 | 0 | assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); |
11072 | 0 | (void)IsTruncatingStore; |
11073 | | |
11074 | | // If the mask is known to be all ones, optimize to an unmasked intrinsic; |
11075 | | // the selection of the masked intrinsics doesn't do this for us. |
11076 | 0 | bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); |
11077 | |
|
11078 | 0 | MVT ContainerVT = VT; |
11079 | 0 | if (VT.isFixedLengthVector()) { |
11080 | 0 | ContainerVT = getContainerForFixedLengthVector(VT); |
11081 | 0 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), |
11082 | 0 | ContainerVT.getVectorElementCount()); |
11083 | |
|
11084 | 0 | Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); |
11085 | 0 | Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); |
11086 | |
|
11087 | 0 | if (!IsUnmasked) { |
11088 | 0 | MVT MaskVT = getMaskTypeFor(ContainerVT); |
11089 | 0 | Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); |
11090 | 0 | } |
11091 | 0 | } |
11092 | |
|
11093 | 0 | if (!VL) |
11094 | 0 | VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; |
11095 | |
|
11096 | 0 | if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { |
11097 | 0 | IndexVT = IndexVT.changeVectorElementType(XLenVT); |
11098 | 0 | Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); |
11099 | 0 | } |
11100 | |
|
11101 | 0 | unsigned IntID = |
11102 | 0 | IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; |
11103 | 0 | SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; |
11104 | 0 | Ops.push_back(Val); |
11105 | 0 | Ops.push_back(BasePtr); |
11106 | 0 | Ops.push_back(Index); |
11107 | 0 | if (!IsUnmasked) |
11108 | 0 | Ops.push_back(Mask); |
11109 | 0 | Ops.push_back(VL); |
11110 | |
|
11111 | 0 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, |
11112 | 0 | DAG.getVTList(MVT::Other), Ops, MemVT, MMO); |
11113 | 0 | } |
11114 | | |
11115 | | SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, |
11116 | 0 | SelectionDAG &DAG) const { |
11117 | 0 | const MVT XLenVT = Subtarget.getXLenVT(); |
11118 | 0 | SDLoc DL(Op); |
11119 | 0 | SDValue Chain = Op->getOperand(0); |
11120 | 0 | SDValue SysRegNo = DAG.getTargetConstant( |
11121 | 0 | RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); |
11122 | 0 | SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); |
11123 | 0 | SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); |
11124 | | |
11125 | | // Encoding used for rounding mode in RISC-V differs from that used in |
11126 | | // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a |
11127 | | // table, which consists of a sequence of 4-bit fields, each representing |
11128 | | // corresponding FLT_ROUNDS mode. |
11129 | 0 | static const int Table = |
11130 | 0 | (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | |
11131 | 0 | (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | |
11132 | 0 | (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | |
11133 | 0 | (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | |
11134 | 0 | (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); |
11135 | |
|
11136 | 0 | SDValue Shift = |
11137 | 0 | DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); |
11138 | 0 | SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, |
11139 | 0 | DAG.getConstant(Table, DL, XLenVT), Shift); |
11140 | 0 | SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, |
11141 | 0 | DAG.getConstant(7, DL, XLenVT)); |
11142 | |
|
11143 | 0 | return DAG.getMergeValues({Masked, Chain}, DL); |
11144 | 0 | } |
11145 | | |
11146 | | SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, |
11147 | 0 | SelectionDAG &DAG) const { |
11148 | 0 | const MVT XLenVT = Subtarget.getXLenVT(); |
11149 | 0 | SDLoc DL(Op); |
11150 | 0 | SDValue Chain = Op->getOperand(0); |
11151 | 0 | SDValue RMValue = Op->getOperand(1); |
11152 | 0 | SDValue SysRegNo = DAG.getTargetConstant( |
11153 | 0 | RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); |
11154 | | |
11155 | | // Encoding used for rounding mode in RISC-V differs from that used in |
11156 | | // FLT_ROUNDS. To convert it the C rounding mode is used as an index in |
11157 | | // a table, which consists of a sequence of 4-bit fields, each representing |
11158 | | // corresponding RISC-V mode. |
11159 | 0 | static const unsigned Table = |
11160 | 0 | (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | |
11161 | 0 | (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | |
11162 | 0 | (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | |
11163 | 0 | (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | |
11164 | 0 | (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); |
11165 | |
|
11166 | 0 | RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue); |
11167 | |
|
11168 | 0 | SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, |
11169 | 0 | DAG.getConstant(2, DL, XLenVT)); |
11170 | 0 | SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, |
11171 | 0 | DAG.getConstant(Table, DL, XLenVT), Shift); |
11172 | 0 | RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, |
11173 | 0 | DAG.getConstant(0x7, DL, XLenVT)); |
11174 | 0 | return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, |
11175 | 0 | RMValue); |
11176 | 0 | } |
11177 | | |
11178 | | SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
11179 | 0 | SelectionDAG &DAG) const { |
11180 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
11181 | |
|
11182 | 0 | bool isRISCV64 = Subtarget.is64Bit(); |
11183 | 0 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
11184 | |
|
11185 | 0 | int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); |
11186 | 0 | return DAG.getFrameIndex(FI, PtrVT); |
11187 | 0 | } |
11188 | | |
11189 | | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
11190 | | // form of the given Opcode. |
11191 | 1.97k | static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { |
11192 | 1.97k | switch (Opcode) { |
11193 | 0 | default: |
11194 | 0 | llvm_unreachable("Unexpected opcode"); |
11195 | 589 | case ISD::SHL: |
11196 | 589 | return RISCVISD::SLLW; |
11197 | 550 | case ISD::SRA: |
11198 | 550 | return RISCVISD::SRAW; |
11199 | 836 | case ISD::SRL: |
11200 | 836 | return RISCVISD::SRLW; |
11201 | 0 | case ISD::SDIV: |
11202 | 0 | return RISCVISD::DIVW; |
11203 | 0 | case ISD::UDIV: |
11204 | 0 | return RISCVISD::DIVUW; |
11205 | 0 | case ISD::UREM: |
11206 | 0 | return RISCVISD::REMUW; |
11207 | 0 | case ISD::ROTL: |
11208 | 0 | return RISCVISD::ROLW; |
11209 | 0 | case ISD::ROTR: |
11210 | 0 | return RISCVISD::RORW; |
11211 | 1.97k | } |
11212 | 1.97k | } |
11213 | | |
11214 | | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
11215 | | // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would |
11216 | | // otherwise be promoted to i64, making it difficult to select the |
11217 | | // SLLW/DIVUW/.../*W later one because the fact the operation was originally of |
11218 | | // type i8/i16/i32 is lost. |
11219 | | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, |
11220 | 1.97k | unsigned ExtOpc = ISD::ANY_EXTEND) { |
11221 | 1.97k | SDLoc DL(N); |
11222 | 1.97k | RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); |
11223 | 1.97k | SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); |
11224 | 1.97k | SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); |
11225 | 1.97k | SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); |
11226 | | // ReplaceNodeResults requires we maintain the same type for the return value. |
11227 | 1.97k | return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); |
11228 | 1.97k | } |
11229 | | |
11230 | | // Converts the given 32-bit operation to a i64 operation with signed extension |
11231 | | // semantic to reduce the signed extension instructions. |
11232 | 1.99k | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
11233 | 1.99k | SDLoc DL(N); |
11234 | 1.99k | SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11235 | 1.99k | SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11236 | 1.99k | SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); |
11237 | 1.99k | SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, |
11238 | 1.99k | DAG.getValueType(MVT::i32)); |
11239 | 1.99k | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); |
11240 | 1.99k | } |
11241 | | |
11242 | | void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, |
11243 | | SmallVectorImpl<SDValue> &Results, |
11244 | 19.3k | SelectionDAG &DAG) const { |
11245 | 19.3k | SDLoc DL(N); |
11246 | 19.3k | switch (N->getOpcode()) { |
11247 | 0 | default: |
11248 | 0 | llvm_unreachable("Don't know how to custom type legalize this operation!"); |
11249 | 0 | case ISD::STRICT_FP_TO_SINT: |
11250 | 0 | case ISD::STRICT_FP_TO_UINT: |
11251 | 0 | case ISD::FP_TO_SINT: |
11252 | 0 | case ISD::FP_TO_UINT: { |
11253 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11254 | 0 | "Unexpected custom legalisation"); |
11255 | 0 | bool IsStrict = N->isStrictFPOpcode(); |
11256 | 0 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
11257 | 0 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
11258 | 0 | SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); |
11259 | 0 | if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != |
11260 | 0 | TargetLowering::TypeSoftenFloat) { |
11261 | 0 | if (!isTypeLegal(Op0.getValueType())) |
11262 | 0 | return; |
11263 | 0 | if (IsStrict) { |
11264 | 0 | SDValue Chain = N->getOperand(0); |
11265 | | // In absense of Zfh, promote f16 to f32, then convert. |
11266 | 0 | if (Op0.getValueType() == MVT::f16 && |
11267 | 0 | !Subtarget.hasStdExtZfhOrZhinx()) { |
11268 | 0 | Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, |
11269 | 0 | {Chain, Op0}); |
11270 | 0 | Chain = Op0.getValue(1); |
11271 | 0 | } |
11272 | 0 | unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 |
11273 | 0 | : RISCVISD::STRICT_FCVT_WU_RV64; |
11274 | 0 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); |
11275 | 0 | SDValue Res = DAG.getNode( |
11276 | 0 | Opc, DL, VTs, Chain, Op0, |
11277 | 0 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); |
11278 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11279 | 0 | Results.push_back(Res.getValue(1)); |
11280 | 0 | return; |
11281 | 0 | } |
11282 | | // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then |
11283 | | // convert. |
11284 | 0 | if ((Op0.getValueType() == MVT::f16 && |
11285 | 0 | !Subtarget.hasStdExtZfhOrZhinx()) || |
11286 | 0 | Op0.getValueType() == MVT::bf16) |
11287 | 0 | Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); |
11288 | |
|
11289 | 0 | unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
11290 | 0 | SDValue Res = |
11291 | 0 | DAG.getNode(Opc, DL, MVT::i64, Op0, |
11292 | 0 | DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); |
11293 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11294 | 0 | return; |
11295 | 0 | } |
11296 | | // If the FP type needs to be softened, emit a library call using the 'si' |
11297 | | // version. If we left it to default legalization we'd end up with 'di'. If |
11298 | | // the FP type doesn't need to be softened just let generic type |
11299 | | // legalization promote the result type. |
11300 | 0 | RTLIB::Libcall LC; |
11301 | 0 | if (IsSigned) |
11302 | 0 | LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); |
11303 | 0 | else |
11304 | 0 | LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); |
11305 | 0 | MakeLibCallOptions CallOptions; |
11306 | 0 | EVT OpVT = Op0.getValueType(); |
11307 | 0 | CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); |
11308 | 0 | SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); |
11309 | 0 | SDValue Result; |
11310 | 0 | std::tie(Result, Chain) = |
11311 | 0 | makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); |
11312 | 0 | Results.push_back(Result); |
11313 | 0 | if (IsStrict) |
11314 | 0 | Results.push_back(Chain); |
11315 | 0 | break; |
11316 | 0 | } |
11317 | 0 | case ISD::LROUND: { |
11318 | 0 | SDValue Op0 = N->getOperand(0); |
11319 | 0 | EVT Op0VT = Op0.getValueType(); |
11320 | 0 | if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != |
11321 | 0 | TargetLowering::TypeSoftenFloat) { |
11322 | 0 | if (!isTypeLegal(Op0VT)) |
11323 | 0 | return; |
11324 | | |
11325 | | // In absense of Zfh, promote f16 to f32, then convert. |
11326 | 0 | if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) |
11327 | 0 | Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); |
11328 | |
|
11329 | 0 | SDValue Res = |
11330 | 0 | DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0, |
11331 | 0 | DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64)); |
11332 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11333 | 0 | return; |
11334 | 0 | } |
11335 | | // If the FP type needs to be softened, emit a library call to lround. We'll |
11336 | | // need to truncate the result. We assume any value that doesn't fit in i32 |
11337 | | // is allowed to return an unspecified value. |
11338 | 0 | RTLIB::Libcall LC = |
11339 | 0 | Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; |
11340 | 0 | MakeLibCallOptions CallOptions; |
11341 | 0 | EVT OpVT = Op0.getValueType(); |
11342 | 0 | CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true); |
11343 | 0 | SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first; |
11344 | 0 | Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result); |
11345 | 0 | Results.push_back(Result); |
11346 | 0 | break; |
11347 | 0 | } |
11348 | 0 | case ISD::READCYCLECOUNTER: { |
11349 | 0 | assert(!Subtarget.is64Bit() && |
11350 | 0 | "READCYCLECOUNTER only has custom type legalization on riscv32"); |
11351 | | |
11352 | 0 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); |
11353 | 0 | SDValue RCW = |
11354 | 0 | DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); |
11355 | |
|
11356 | 0 | Results.push_back( |
11357 | 0 | DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); |
11358 | 0 | Results.push_back(RCW.getValue(2)); |
11359 | 0 | break; |
11360 | 0 | } |
11361 | 14.4k | case ISD::LOAD: { |
11362 | 14.4k | if (!ISD::isNON_EXTLoad(N)) |
11363 | 14 | return; |
11364 | | |
11365 | | // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the |
11366 | | // sext_inreg we emit for ADD/SUB/MUL/SLLI. |
11367 | 14.4k | LoadSDNode *Ld = cast<LoadSDNode>(N); |
11368 | | |
11369 | 14.4k | SDLoc dl(N); |
11370 | 14.4k | SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), |
11371 | 14.4k | Ld->getBasePtr(), Ld->getMemoryVT(), |
11372 | 14.4k | Ld->getMemOperand()); |
11373 | 14.4k | Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); |
11374 | 14.4k | Results.push_back(Res.getValue(1)); |
11375 | 14.4k | return; |
11376 | 14.4k | } |
11377 | 0 | case ISD::MUL: { |
11378 | 0 | unsigned Size = N->getSimpleValueType(0).getSizeInBits(); |
11379 | 0 | unsigned XLen = Subtarget.getXLen(); |
11380 | | // This multiply needs to be expanded, try to use MULHSU+MUL if possible. |
11381 | 0 | if (Size > XLen) { |
11382 | 0 | assert(Size == (XLen * 2) && "Unexpected custom legalisation"); |
11383 | 0 | SDValue LHS = N->getOperand(0); |
11384 | 0 | SDValue RHS = N->getOperand(1); |
11385 | 0 | APInt HighMask = APInt::getHighBitsSet(Size, XLen); |
11386 | |
|
11387 | 0 | bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); |
11388 | 0 | bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); |
11389 | | // We need exactly one side to be unsigned. |
11390 | 0 | if (LHSIsU == RHSIsU) |
11391 | 0 | return; |
11392 | | |
11393 | 0 | auto MakeMULPair = [&](SDValue S, SDValue U) { |
11394 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11395 | 0 | S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); |
11396 | 0 | U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); |
11397 | 0 | SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); |
11398 | 0 | SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); |
11399 | 0 | return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); |
11400 | 0 | }; |
11401 | |
|
11402 | 0 | bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; |
11403 | 0 | bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; |
11404 | | |
11405 | | // The other operand should be signed, but still prefer MULH when |
11406 | | // possible. |
11407 | 0 | if (RHSIsU && LHSIsS && !RHSIsS) |
11408 | 0 | Results.push_back(MakeMULPair(LHS, RHS)); |
11409 | 0 | else if (LHSIsU && RHSIsS && !LHSIsS) |
11410 | 0 | Results.push_back(MakeMULPair(RHS, LHS)); |
11411 | |
|
11412 | 0 | return; |
11413 | 0 | } |
11414 | 0 | [[fallthrough]]; |
11415 | 0 | } |
11416 | 1.10k | case ISD::ADD: |
11417 | 1.99k | case ISD::SUB: |
11418 | 1.99k | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11419 | 1.99k | "Unexpected custom legalisation"); |
11420 | 0 | Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); |
11421 | 1.99k | break; |
11422 | 784 | case ISD::SHL: |
11423 | 1.50k | case ISD::SRA: |
11424 | 2.89k | case ISD::SRL: |
11425 | 2.89k | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11426 | 2.89k | "Unexpected custom legalisation"); |
11427 | 2.89k | if (N->getOperand(1).getOpcode() != ISD::Constant) { |
11428 | | // If we can use a BSET instruction, allow default promotion to apply. |
11429 | 1.97k | if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && |
11430 | 1.97k | isOneConstant(N->getOperand(0))) |
11431 | 0 | break; |
11432 | 1.97k | Results.push_back(customLegalizeToWOp(N, DAG)); |
11433 | 1.97k | break; |
11434 | 1.97k | } |
11435 | | |
11436 | | // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is |
11437 | | // similar to customLegalizeToWOpWithSExt, but we must zero_extend the |
11438 | | // shift amount. |
11439 | 916 | if (N->getOpcode() == ISD::SHL) { |
11440 | 195 | SDLoc DL(N); |
11441 | 195 | SDValue NewOp0 = |
11442 | 195 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11443 | 195 | SDValue NewOp1 = |
11444 | 195 | DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11445 | 195 | SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); |
11446 | 195 | SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, |
11447 | 195 | DAG.getValueType(MVT::i32)); |
11448 | 195 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); |
11449 | 195 | } |
11450 | | |
11451 | 916 | break; |
11452 | 0 | case ISD::ROTL: |
11453 | 0 | case ISD::ROTR: |
11454 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11455 | 0 | "Unexpected custom legalisation"); |
11456 | 0 | assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() || |
11457 | 0 | Subtarget.hasVendorXTHeadBb()) && |
11458 | 0 | "Unexpected custom legalization"); |
11459 | 0 | if (!isa<ConstantSDNode>(N->getOperand(1)) && |
11460 | 0 | !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())) |
11461 | 0 | return; |
11462 | 0 | Results.push_back(customLegalizeToWOp(N, DAG)); |
11463 | 0 | break; |
11464 | 0 | case ISD::CTTZ: |
11465 | 0 | case ISD::CTTZ_ZERO_UNDEF: |
11466 | 0 | case ISD::CTLZ: |
11467 | 0 | case ISD::CTLZ_ZERO_UNDEF: { |
11468 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11469 | 0 | "Unexpected custom legalisation"); |
11470 | | |
11471 | 0 | SDValue NewOp0 = |
11472 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11473 | 0 | bool IsCTZ = |
11474 | 0 | N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; |
11475 | 0 | unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; |
11476 | 0 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); |
11477 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11478 | 0 | return; |
11479 | 0 | } |
11480 | 0 | case ISD::SDIV: |
11481 | 0 | case ISD::UDIV: |
11482 | 0 | case ISD::UREM: { |
11483 | 0 | MVT VT = N->getSimpleValueType(0); |
11484 | 0 | assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && |
11485 | 0 | Subtarget.is64Bit() && Subtarget.hasStdExtM() && |
11486 | 0 | "Unexpected custom legalisation"); |
11487 | | // Don't promote division/remainder by constant since we should expand those |
11488 | | // to multiply by magic constant. |
11489 | 0 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
11490 | 0 | if (N->getOperand(1).getOpcode() == ISD::Constant && |
11491 | 0 | !isIntDivCheap(N->getValueType(0), Attr)) |
11492 | 0 | return; |
11493 | | |
11494 | | // If the input is i32, use ANY_EXTEND since the W instructions don't read |
11495 | | // the upper 32 bits. For other types we need to sign or zero extend |
11496 | | // based on the opcode. |
11497 | 0 | unsigned ExtOpc = ISD::ANY_EXTEND; |
11498 | 0 | if (VT != MVT::i32) |
11499 | 0 | ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND |
11500 | 0 | : ISD::ZERO_EXTEND; |
11501 | |
|
11502 | 0 | Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); |
11503 | 0 | break; |
11504 | 0 | } |
11505 | 0 | case ISD::SADDO: { |
11506 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11507 | 0 | "Unexpected custom legalisation"); |
11508 | | |
11509 | | // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise |
11510 | | // use the default legalization. |
11511 | 0 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
11512 | 0 | return; |
11513 | | |
11514 | 0 | SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11515 | 0 | SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11516 | 0 | SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS); |
11517 | 0 | Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, |
11518 | 0 | DAG.getValueType(MVT::i32)); |
11519 | |
|
11520 | 0 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
11521 | | |
11522 | | // For an addition, the result should be less than one of the operands (LHS) |
11523 | | // if and only if the other operand (RHS) is negative, otherwise there will |
11524 | | // be overflow. |
11525 | | // For a subtraction, the result should be less than one of the operands |
11526 | | // (LHS) if and only if the other operand (RHS) is (non-zero) positive, |
11527 | | // otherwise there will be overflow. |
11528 | 0 | EVT OType = N->getValueType(1); |
11529 | 0 | SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT); |
11530 | 0 | SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT); |
11531 | |
|
11532 | 0 | SDValue Overflow = |
11533 | 0 | DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS); |
11534 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11535 | 0 | Results.push_back(Overflow); |
11536 | 0 | return; |
11537 | 0 | } |
11538 | 1 | case ISD::UADDO: |
11539 | 1 | case ISD::USUBO: { |
11540 | 1 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11541 | 1 | "Unexpected custom legalisation"); |
11542 | 0 | bool IsAdd = N->getOpcode() == ISD::UADDO; |
11543 | | // Create an ADDW or SUBW. |
11544 | 1 | SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11545 | 1 | SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11546 | 1 | SDValue Res = |
11547 | 1 | DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); |
11548 | 1 | Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, |
11549 | 1 | DAG.getValueType(MVT::i32)); |
11550 | | |
11551 | 1 | SDValue Overflow; |
11552 | 1 | if (IsAdd && isOneConstant(RHS)) { |
11553 | | // Special case uaddo X, 1 overflowed if the addition result is 0. |
11554 | | // The general case (X + C) < C is not necessarily beneficial. Although we |
11555 | | // reduce the live range of X, we may introduce the materialization of |
11556 | | // constant C, especially when the setcc result is used by branch. We have |
11557 | | // no compare with constant and branch instructions. |
11558 | 0 | Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, |
11559 | 0 | DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); |
11560 | 1 | } else if (IsAdd && isAllOnesConstant(RHS)) { |
11561 | | // Special case uaddo X, -1 overflowed if X != 0. |
11562 | 0 | Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0), |
11563 | 0 | DAG.getConstant(0, DL, MVT::i32), ISD::SETNE); |
11564 | 1 | } else { |
11565 | | // Sign extend the LHS and perform an unsigned compare with the ADDW |
11566 | | // result. Since the inputs are sign extended from i32, this is equivalent |
11567 | | // to comparing the lower 32 bits. |
11568 | 1 | LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11569 | 1 | Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, |
11570 | 1 | IsAdd ? ISD::SETULT : ISD::SETUGT); |
11571 | 1 | } |
11572 | | |
11573 | 1 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11574 | 1 | Results.push_back(Overflow); |
11575 | 1 | return; |
11576 | 1 | } |
11577 | 0 | case ISD::UADDSAT: |
11578 | 0 | case ISD::USUBSAT: { |
11579 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11580 | 0 | "Unexpected custom legalisation"); |
11581 | 0 | if (Subtarget.hasStdExtZbb()) { |
11582 | | // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using |
11583 | | // sign extend allows overflow of the lower 32 bits to be detected on |
11584 | | // the promoted size. |
11585 | 0 | SDValue LHS = |
11586 | 0 | DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11587 | 0 | SDValue RHS = |
11588 | 0 | DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11589 | 0 | SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); |
11590 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11591 | 0 | return; |
11592 | 0 | } |
11593 | | |
11594 | | // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom |
11595 | | // promotion for UADDO/USUBO. |
11596 | 0 | Results.push_back(expandAddSubSat(N, DAG)); |
11597 | 0 | return; |
11598 | 0 | } |
11599 | 0 | case ISD::ABS: { |
11600 | 0 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
11601 | 0 | "Unexpected custom legalisation"); |
11602 | | |
11603 | 0 | if (Subtarget.hasStdExtZbb()) { |
11604 | | // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. |
11605 | | // This allows us to remember that the result is sign extended. Expanding |
11606 | | // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. |
11607 | 0 | SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, |
11608 | 0 | N->getOperand(0)); |
11609 | 0 | SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); |
11610 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); |
11611 | 0 | return; |
11612 | 0 | } |
11613 | | |
11614 | | // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) |
11615 | 0 | SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); |
11616 | | |
11617 | | // Freeze the source so we can increase it's use count. |
11618 | 0 | Src = DAG.getFreeze(Src); |
11619 | | |
11620 | | // Copy sign bit to all bits using the sraiw pattern. |
11621 | 0 | SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, |
11622 | 0 | DAG.getValueType(MVT::i32)); |
11623 | 0 | SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, |
11624 | 0 | DAG.getConstant(31, DL, MVT::i64)); |
11625 | |
|
11626 | 0 | SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); |
11627 | 0 | NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); |
11628 | | |
11629 | | // NOTE: The result is only required to be anyextended, but sext is |
11630 | | // consistent with type legalization of sub. |
11631 | 0 | NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, |
11632 | 0 | DAG.getValueType(MVT::i32)); |
11633 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); |
11634 | 0 | return; |
11635 | 0 | } |
11636 | 0 | case ISD::BITCAST: { |
11637 | 0 | EVT VT = N->getValueType(0); |
11638 | 0 | assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); |
11639 | 0 | SDValue Op0 = N->getOperand(0); |
11640 | 0 | EVT Op0VT = Op0.getValueType(); |
11641 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11642 | 0 | if (VT == MVT::i16 && Op0VT == MVT::f16 && |
11643 | 0 | Subtarget.hasStdExtZfhminOrZhinxmin()) { |
11644 | 0 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); |
11645 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); |
11646 | 0 | } else if (VT == MVT::i16 && Op0VT == MVT::bf16 && |
11647 | 0 | Subtarget.hasStdExtZfbfmin()) { |
11648 | 0 | SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); |
11649 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); |
11650 | 0 | } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && |
11651 | 0 | Subtarget.hasStdExtFOrZfinx()) { |
11652 | 0 | SDValue FPConv = |
11653 | 0 | DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); |
11654 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); |
11655 | 0 | } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 && |
11656 | 0 | Subtarget.hasStdExtZfa()) { |
11657 | 0 | SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL, |
11658 | 0 | DAG.getVTList(MVT::i32, MVT::i32), Op0); |
11659 | 0 | SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, |
11660 | 0 | NewReg.getValue(0), NewReg.getValue(1)); |
11661 | 0 | Results.push_back(RetReg); |
11662 | 0 | } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && |
11663 | 0 | isTypeLegal(Op0VT)) { |
11664 | | // Custom-legalize bitcasts from fixed-length vector types to illegal |
11665 | | // scalar types in order to improve codegen. Bitcast the vector to a |
11666 | | // one-element vector type whose element type is the same as the result |
11667 | | // type, and extract the first element. |
11668 | 0 | EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); |
11669 | 0 | if (isTypeLegal(BVT)) { |
11670 | 0 | SDValue BVec = DAG.getBitcast(BVT, Op0); |
11671 | 0 | Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, |
11672 | 0 | DAG.getConstant(0, DL, XLenVT))); |
11673 | 0 | } |
11674 | 0 | } |
11675 | 0 | break; |
11676 | 0 | } |
11677 | 0 | case RISCVISD::BREV8: { |
11678 | 0 | MVT VT = N->getSimpleValueType(0); |
11679 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11680 | 0 | assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
11681 | 0 | "Unexpected custom legalisation"); |
11682 | 0 | assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); |
11683 | 0 | SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); |
11684 | 0 | SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); |
11685 | | // ReplaceNodeResults requires we maintain the same type for the return |
11686 | | // value. |
11687 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); |
11688 | 0 | break; |
11689 | 0 | } |
11690 | 0 | case ISD::EXTRACT_VECTOR_ELT: { |
11691 | | // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element |
11692 | | // type is illegal (currently only vXi64 RV32). |
11693 | | // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are |
11694 | | // transferred to the destination register. We issue two of these from the |
11695 | | // upper- and lower- halves of the SEW-bit vector element, slid down to the |
11696 | | // first element. |
11697 | 0 | SDValue Vec = N->getOperand(0); |
11698 | 0 | SDValue Idx = N->getOperand(1); |
11699 | | |
11700 | | // The vector type hasn't been legalized yet so we can't issue target |
11701 | | // specific nodes if it needs legalization. |
11702 | | // FIXME: We would manually legalize if it's important. |
11703 | 0 | if (!isTypeLegal(Vec.getValueType())) |
11704 | 0 | return; |
11705 | | |
11706 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
11707 | |
|
11708 | 0 | assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && |
11709 | 0 | VecVT.getVectorElementType() == MVT::i64 && |
11710 | 0 | "Unexpected EXTRACT_VECTOR_ELT legalization"); |
11711 | | |
11712 | | // If this is a fixed vector, we need to convert it to a scalable vector. |
11713 | 0 | MVT ContainerVT = VecVT; |
11714 | 0 | if (VecVT.isFixedLengthVector()) { |
11715 | 0 | ContainerVT = getContainerForFixedLengthVector(VecVT); |
11716 | 0 | Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); |
11717 | 0 | } |
11718 | |
|
11719 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11720 | | |
11721 | | // Use a VL of 1 to avoid processing more elements than we need. |
11722 | 0 | auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); |
11723 | | |
11724 | | // Unless the index is known to be 0, we must slide the vector down to get |
11725 | | // the desired element into index 0. |
11726 | 0 | if (!isNullConstant(Idx)) { |
11727 | 0 | Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, |
11728 | 0 | DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); |
11729 | 0 | } |
11730 | | |
11731 | | // Extract the lower XLEN bits of the correct vector element. |
11732 | 0 | SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); |
11733 | | |
11734 | | // To extract the upper XLEN bits of the vector element, shift the first |
11735 | | // element right by 32 bits and re-extract the lower XLEN bits. |
11736 | 0 | SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, |
11737 | 0 | DAG.getUNDEF(ContainerVT), |
11738 | 0 | DAG.getConstant(32, DL, XLenVT), VL); |
11739 | 0 | SDValue LShr32 = |
11740 | 0 | DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, |
11741 | 0 | DAG.getUNDEF(ContainerVT), Mask, VL); |
11742 | |
|
11743 | 0 | SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); |
11744 | |
|
11745 | 0 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); |
11746 | 0 | break; |
11747 | 0 | } |
11748 | 0 | case ISD::INTRINSIC_WO_CHAIN: { |
11749 | 0 | unsigned IntNo = N->getConstantOperandVal(0); |
11750 | 0 | switch (IntNo) { |
11751 | 0 | default: |
11752 | 0 | llvm_unreachable( |
11753 | 0 | "Don't know how to custom type legalize this intrinsic!"); |
11754 | 0 | case Intrinsic::experimental_get_vector_length: { |
11755 | 0 | SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); |
11756 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11757 | 0 | return; |
11758 | 0 | } |
11759 | 0 | case Intrinsic::riscv_orc_b: |
11760 | 0 | case Intrinsic::riscv_brev8: |
11761 | 0 | case Intrinsic::riscv_sha256sig0: |
11762 | 0 | case Intrinsic::riscv_sha256sig1: |
11763 | 0 | case Intrinsic::riscv_sha256sum0: |
11764 | 0 | case Intrinsic::riscv_sha256sum1: |
11765 | 0 | case Intrinsic::riscv_sm3p0: |
11766 | 0 | case Intrinsic::riscv_sm3p1: { |
11767 | 0 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
11768 | 0 | return; |
11769 | 0 | unsigned Opc; |
11770 | 0 | switch (IntNo) { |
11771 | 0 | case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break; |
11772 | 0 | case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break; |
11773 | 0 | case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break; |
11774 | 0 | case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break; |
11775 | 0 | case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break; |
11776 | 0 | case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break; |
11777 | 0 | case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break; |
11778 | 0 | case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break; |
11779 | 0 | } |
11780 | | |
11781 | 0 | SDValue NewOp = |
11782 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11783 | 0 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp); |
11784 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11785 | 0 | return; |
11786 | 0 | } |
11787 | 0 | case Intrinsic::riscv_sm4ks: |
11788 | 0 | case Intrinsic::riscv_sm4ed: { |
11789 | 0 | unsigned Opc = |
11790 | 0 | IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED; |
11791 | 0 | SDValue NewOp0 = |
11792 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11793 | 0 | SDValue NewOp1 = |
11794 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
11795 | 0 | SDValue Res = |
11796 | 0 | DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3)); |
11797 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11798 | 0 | return; |
11799 | 0 | } |
11800 | 0 | case Intrinsic::riscv_clmul: { |
11801 | 0 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
11802 | 0 | return; |
11803 | | |
11804 | 0 | SDValue NewOp0 = |
11805 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11806 | 0 | SDValue NewOp1 = |
11807 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
11808 | 0 | SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1); |
11809 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11810 | 0 | return; |
11811 | 0 | } |
11812 | 0 | case Intrinsic::riscv_clmulh: |
11813 | 0 | case Intrinsic::riscv_clmulr: { |
11814 | 0 | if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32) |
11815 | 0 | return; |
11816 | | |
11817 | | // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros |
11818 | | // to the full 128-bit clmul result of multiplying two xlen values. |
11819 | | // Perform clmulr or clmulh on the shifted values. Finally, extract the |
11820 | | // upper 32 bits. |
11821 | | // |
11822 | | // The alternative is to mask the inputs to 32 bits and use clmul, but |
11823 | | // that requires two shifts to mask each input without zext.w. |
11824 | | // FIXME: If the inputs are known zero extended or could be freely |
11825 | | // zero extended, the mask form would be better. |
11826 | 0 | SDValue NewOp0 = |
11827 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); |
11828 | 0 | SDValue NewOp1 = |
11829 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); |
11830 | 0 | NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, |
11831 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
11832 | 0 | NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1, |
11833 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
11834 | 0 | unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH |
11835 | 0 | : RISCVISD::CLMULR; |
11836 | 0 | SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1); |
11837 | 0 | Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res, |
11838 | 0 | DAG.getConstant(32, DL, MVT::i64)); |
11839 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); |
11840 | 0 | return; |
11841 | 0 | } |
11842 | 0 | case Intrinsic::riscv_vmv_x_s: { |
11843 | 0 | EVT VT = N->getValueType(0); |
11844 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
11845 | 0 | if (VT.bitsLT(XLenVT)) { |
11846 | | // Simple case just extract using vmv.x.s and truncate. |
11847 | 0 | SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, |
11848 | 0 | Subtarget.getXLenVT(), N->getOperand(1)); |
11849 | 0 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); |
11850 | 0 | return; |
11851 | 0 | } |
11852 | | |
11853 | 0 | assert(VT == MVT::i64 && !Subtarget.is64Bit() && |
11854 | 0 | "Unexpected custom legalization"); |
11855 | | |
11856 | | // We need to do the move in two steps. |
11857 | 0 | SDValue Vec = N->getOperand(1); |
11858 | 0 | MVT VecVT = Vec.getSimpleValueType(); |
11859 | | |
11860 | | // First extract the lower XLEN bits of the element. |
11861 | 0 | SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); |
11862 | | |
11863 | | // To extract the upper XLEN bits of the vector element, shift the first |
11864 | | // element right by 32 bits and re-extract the lower XLEN bits. |
11865 | 0 | auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); |
11866 | |
|
11867 | 0 | SDValue ThirtyTwoV = |
11868 | 0 | DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), |
11869 | 0 | DAG.getConstant(32, DL, XLenVT), VL); |
11870 | 0 | SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, |
11871 | 0 | DAG.getUNDEF(VecVT), Mask, VL); |
11872 | 0 | SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); |
11873 | |
|
11874 | 0 | Results.push_back( |
11875 | 0 | DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); |
11876 | 0 | break; |
11877 | 0 | } |
11878 | 0 | } |
11879 | 0 | break; |
11880 | 0 | } |
11881 | 0 | case ISD::VECREDUCE_ADD: |
11882 | 0 | case ISD::VECREDUCE_AND: |
11883 | 0 | case ISD::VECREDUCE_OR: |
11884 | 0 | case ISD::VECREDUCE_XOR: |
11885 | 0 | case ISD::VECREDUCE_SMAX: |
11886 | 0 | case ISD::VECREDUCE_UMAX: |
11887 | 0 | case ISD::VECREDUCE_SMIN: |
11888 | 0 | case ISD::VECREDUCE_UMIN: |
11889 | 0 | if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) |
11890 | 0 | Results.push_back(V); |
11891 | 0 | break; |
11892 | 0 | case ISD::VP_REDUCE_ADD: |
11893 | 0 | case ISD::VP_REDUCE_AND: |
11894 | 0 | case ISD::VP_REDUCE_OR: |
11895 | 0 | case ISD::VP_REDUCE_XOR: |
11896 | 0 | case ISD::VP_REDUCE_SMAX: |
11897 | 0 | case ISD::VP_REDUCE_UMAX: |
11898 | 0 | case ISD::VP_REDUCE_SMIN: |
11899 | 0 | case ISD::VP_REDUCE_UMIN: |
11900 | 0 | if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) |
11901 | 0 | Results.push_back(V); |
11902 | 0 | break; |
11903 | 0 | case ISD::GET_ROUNDING: { |
11904 | 0 | SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); |
11905 | 0 | SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); |
11906 | 0 | Results.push_back(Res.getValue(0)); |
11907 | 0 | Results.push_back(Res.getValue(1)); |
11908 | 0 | break; |
11909 | 0 | } |
11910 | 19.3k | } |
11911 | 19.3k | } |
11912 | | |
11913 | | /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP |
11914 | | /// which corresponds to it. |
11915 | 0 | static unsigned getVecReduceOpcode(unsigned Opc) { |
11916 | 0 | switch (Opc) { |
11917 | 0 | default: |
11918 | 0 | llvm_unreachable("Unhandled binary to transfrom reduction"); |
11919 | 0 | case ISD::ADD: |
11920 | 0 | return ISD::VECREDUCE_ADD; |
11921 | 0 | case ISD::UMAX: |
11922 | 0 | return ISD::VECREDUCE_UMAX; |
11923 | 0 | case ISD::SMAX: |
11924 | 0 | return ISD::VECREDUCE_SMAX; |
11925 | 0 | case ISD::UMIN: |
11926 | 0 | return ISD::VECREDUCE_UMIN; |
11927 | 0 | case ISD::SMIN: |
11928 | 0 | return ISD::VECREDUCE_SMIN; |
11929 | 0 | case ISD::AND: |
11930 | 0 | return ISD::VECREDUCE_AND; |
11931 | 0 | case ISD::OR: |
11932 | 0 | return ISD::VECREDUCE_OR; |
11933 | 0 | case ISD::XOR: |
11934 | 0 | return ISD::VECREDUCE_XOR; |
11935 | 0 | case ISD::FADD: |
11936 | | // Note: This is the associative form of the generic reduction opcode. |
11937 | 0 | return ISD::VECREDUCE_FADD; |
11938 | 0 | } |
11939 | 0 | } |
11940 | | |
11941 | | /// Perform two related transforms whose purpose is to incrementally recognize |
11942 | | /// an explode_vector followed by scalar reduction as a vector reduction node. |
11943 | | /// This exists to recover from a deficiency in SLP which can't handle |
11944 | | /// forests with multiple roots sharing common nodes. In some cases, one |
11945 | | /// of the trees will be vectorized, and the other will remain (unprofitably) |
11946 | | /// scalarized. |
11947 | | static SDValue |
11948 | | combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, |
11949 | 297k | const RISCVSubtarget &Subtarget) { |
11950 | | |
11951 | | // This transforms need to run before all integer types have been legalized |
11952 | | // to i64 (so that the vector element type matches the add type), and while |
11953 | | // it's safe to introduce odd sized vector types. |
11954 | 297k | if (DAG.NewNodesMustHaveLegalTypes) |
11955 | 209k | return SDValue(); |
11956 | | |
11957 | | // Without V, this transform isn't useful. We could form the (illegal) |
11958 | | // operations and let them be scalarized again, but there's really no point. |
11959 | 88.2k | if (!Subtarget.hasVInstructions()) |
11960 | 88.2k | return SDValue(); |
11961 | | |
11962 | 0 | const SDLoc DL(N); |
11963 | 0 | const EVT VT = N->getValueType(0); |
11964 | 0 | const unsigned Opc = N->getOpcode(); |
11965 | | |
11966 | | // For FADD, we only handle the case with reassociation allowed. We |
11967 | | // could handle strict reduction order, but at the moment, there's no |
11968 | | // known reason to, and the complexity isn't worth it. |
11969 | | // TODO: Handle fminnum and fmaxnum here |
11970 | 0 | if (!VT.isInteger() && |
11971 | 0 | (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation())) |
11972 | 0 | return SDValue(); |
11973 | | |
11974 | 0 | const unsigned ReduceOpc = getVecReduceOpcode(Opc); |
11975 | 0 | assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) && |
11976 | 0 | "Inconsistent mappings"); |
11977 | 0 | SDValue LHS = N->getOperand(0); |
11978 | 0 | SDValue RHS = N->getOperand(1); |
11979 | |
|
11980 | 0 | if (!LHS.hasOneUse() || !RHS.hasOneUse()) |
11981 | 0 | return SDValue(); |
11982 | | |
11983 | 0 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
11984 | 0 | std::swap(LHS, RHS); |
11985 | |
|
11986 | 0 | if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
11987 | 0 | !isa<ConstantSDNode>(RHS.getOperand(1))) |
11988 | 0 | return SDValue(); |
11989 | | |
11990 | 0 | uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue(); |
11991 | 0 | SDValue SrcVec = RHS.getOperand(0); |
11992 | 0 | EVT SrcVecVT = SrcVec.getValueType(); |
11993 | 0 | assert(SrcVecVT.getVectorElementType() == VT); |
11994 | 0 | if (SrcVecVT.isScalableVector()) |
11995 | 0 | return SDValue(); |
11996 | | |
11997 | 0 | if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen()) |
11998 | 0 | return SDValue(); |
11999 | | |
12000 | | // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to |
12001 | | // reduce_op (extract_subvector [2 x VT] from V). This will form the |
12002 | | // root of our reduction tree. TODO: We could extend this to any two |
12003 | | // adjacent aligned constant indices if desired. |
12004 | 0 | if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
12005 | 0 | LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) { |
12006 | 0 | uint64_t LHSIdx = |
12007 | 0 | cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue(); |
12008 | 0 | if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) { |
12009 | 0 | EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2); |
12010 | 0 | SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, |
12011 | 0 | DAG.getVectorIdxConstant(0, DL)); |
12012 | 0 | return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags()); |
12013 | 0 | } |
12014 | 0 | } |
12015 | | |
12016 | | // Match (binop (reduce (extract_subvector V, 0), |
12017 | | // (extract_vector_elt V, sizeof(SubVec)))) |
12018 | | // into a reduction of one more element from the original vector V. |
12019 | 0 | if (LHS.getOpcode() != ReduceOpc) |
12020 | 0 | return SDValue(); |
12021 | | |
12022 | 0 | SDValue ReduceVec = LHS.getOperand(0); |
12023 | 0 | if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
12024 | 0 | ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) && |
12025 | 0 | isNullConstant(ReduceVec.getOperand(1)) && |
12026 | 0 | ReduceVec.getValueType().getVectorNumElements() == RHSIdx) { |
12027 | | // For illegal types (e.g. 3xi32), most will be combined again into a |
12028 | | // wider (hopefully legal) type. If this is a terminal state, we are |
12029 | | // relying on type legalization here to produce something reasonable |
12030 | | // and this lowering quality could probably be improved. (TODO) |
12031 | 0 | EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1); |
12032 | 0 | SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec, |
12033 | 0 | DAG.getVectorIdxConstant(0, DL)); |
12034 | 0 | auto Flags = ReduceVec->getFlags(); |
12035 | 0 | Flags.intersectWith(N->getFlags()); |
12036 | 0 | return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags); |
12037 | 0 | } |
12038 | | |
12039 | 0 | return SDValue(); |
12040 | 0 | } |
12041 | | |
12042 | | |
12043 | | // Try to fold (<bop> x, (reduction.<bop> vec, start)) |
12044 | | static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, |
12045 | 297k | const RISCVSubtarget &Subtarget) { |
12046 | 297k | auto BinOpToRVVReduce = [](unsigned Opc) { |
12047 | 0 | switch (Opc) { |
12048 | 0 | default: |
12049 | 0 | llvm_unreachable("Unhandled binary to transfrom reduction"); |
12050 | 0 | case ISD::ADD: |
12051 | 0 | return RISCVISD::VECREDUCE_ADD_VL; |
12052 | 0 | case ISD::UMAX: |
12053 | 0 | return RISCVISD::VECREDUCE_UMAX_VL; |
12054 | 0 | case ISD::SMAX: |
12055 | 0 | return RISCVISD::VECREDUCE_SMAX_VL; |
12056 | 0 | case ISD::UMIN: |
12057 | 0 | return RISCVISD::VECREDUCE_UMIN_VL; |
12058 | 0 | case ISD::SMIN: |
12059 | 0 | return RISCVISD::VECREDUCE_SMIN_VL; |
12060 | 0 | case ISD::AND: |
12061 | 0 | return RISCVISD::VECREDUCE_AND_VL; |
12062 | 0 | case ISD::OR: |
12063 | 0 | return RISCVISD::VECREDUCE_OR_VL; |
12064 | 0 | case ISD::XOR: |
12065 | 0 | return RISCVISD::VECREDUCE_XOR_VL; |
12066 | 0 | case ISD::FADD: |
12067 | 0 | return RISCVISD::VECREDUCE_FADD_VL; |
12068 | 0 | case ISD::FMAXNUM: |
12069 | 0 | return RISCVISD::VECREDUCE_FMAX_VL; |
12070 | 0 | case ISD::FMINNUM: |
12071 | 0 | return RISCVISD::VECREDUCE_FMIN_VL; |
12072 | 0 | } |
12073 | 0 | }; |
12074 | | |
12075 | 594k | auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { |
12076 | 594k | return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
12077 | 594k | isNullConstant(V.getOperand(1)) && |
12078 | 594k | V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); |
12079 | 594k | }; |
12080 | | |
12081 | 297k | unsigned Opc = N->getOpcode(); |
12082 | 297k | unsigned ReduceIdx; |
12083 | 297k | if (IsReduction(N->getOperand(0), Opc)) |
12084 | 0 | ReduceIdx = 0; |
12085 | 297k | else if (IsReduction(N->getOperand(1), Opc)) |
12086 | 0 | ReduceIdx = 1; |
12087 | 297k | else |
12088 | 297k | return SDValue(); |
12089 | | |
12090 | | // Skip if FADD disallows reassociation but the combiner needs. |
12091 | 0 | if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) |
12092 | 0 | return SDValue(); |
12093 | | |
12094 | 0 | SDValue Extract = N->getOperand(ReduceIdx); |
12095 | 0 | SDValue Reduce = Extract.getOperand(0); |
12096 | 0 | if (!Extract.hasOneUse() || !Reduce.hasOneUse()) |
12097 | 0 | return SDValue(); |
12098 | | |
12099 | 0 | SDValue ScalarV = Reduce.getOperand(2); |
12100 | 0 | EVT ScalarVT = ScalarV.getValueType(); |
12101 | 0 | if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && |
12102 | 0 | ScalarV.getOperand(0)->isUndef() && |
12103 | 0 | isNullConstant(ScalarV.getOperand(2))) |
12104 | 0 | ScalarV = ScalarV.getOperand(1); |
12105 | | |
12106 | | // Make sure that ScalarV is a splat with VL=1. |
12107 | 0 | if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && |
12108 | 0 | ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && |
12109 | 0 | ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) |
12110 | 0 | return SDValue(); |
12111 | | |
12112 | 0 | if (!isNonZeroAVL(ScalarV.getOperand(2))) |
12113 | 0 | return SDValue(); |
12114 | | |
12115 | | // Check the scalar of ScalarV is neutral element |
12116 | | // TODO: Deal with value other than neutral element. |
12117 | 0 | if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), |
12118 | 0 | 0)) |
12119 | 0 | return SDValue(); |
12120 | | |
12121 | | // If the AVL is zero, operand 0 will be returned. So it's not safe to fold. |
12122 | | // FIXME: We might be able to improve this if operand 0 is undef. |
12123 | 0 | if (!isNonZeroAVL(Reduce.getOperand(5))) |
12124 | 0 | return SDValue(); |
12125 | | |
12126 | 0 | SDValue NewStart = N->getOperand(1 - ReduceIdx); |
12127 | |
|
12128 | 0 | SDLoc DL(N); |
12129 | 0 | SDValue NewScalarV = |
12130 | 0 | lowerScalarInsert(NewStart, ScalarV.getOperand(2), |
12131 | 0 | ScalarV.getSimpleValueType(), DL, DAG, Subtarget); |
12132 | | |
12133 | | // If we looked through an INSERT_SUBVECTOR we need to restore it. |
12134 | 0 | if (ScalarVT != ScalarV.getValueType()) |
12135 | 0 | NewScalarV = |
12136 | 0 | DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), |
12137 | 0 | NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT())); |
12138 | |
|
12139 | 0 | SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1), |
12140 | 0 | NewScalarV, Reduce.getOperand(3), |
12141 | 0 | Reduce.getOperand(4), Reduce.getOperand(5)}; |
12142 | 0 | SDValue NewReduce = |
12143 | 0 | DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops); |
12144 | 0 | return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, |
12145 | 0 | Extract.getOperand(1)); |
12146 | 0 | } |
12147 | | |
12148 | | // Optimize (add (shl x, c0), (shl y, c1)) -> |
12149 | | // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. |
12150 | | static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, |
12151 | 80.5k | const RISCVSubtarget &Subtarget) { |
12152 | | // Perform this optimization only in the zba extension. |
12153 | 80.5k | if (!Subtarget.hasStdExtZba()) |
12154 | 80.5k | return SDValue(); |
12155 | | |
12156 | | // Skip for vector types and larger types. |
12157 | 0 | EVT VT = N->getValueType(0); |
12158 | 0 | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
12159 | 0 | return SDValue(); |
12160 | | |
12161 | | // The two operand nodes must be SHL and have no other use. |
12162 | 0 | SDValue N0 = N->getOperand(0); |
12163 | 0 | SDValue N1 = N->getOperand(1); |
12164 | 0 | if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || |
12165 | 0 | !N0->hasOneUse() || !N1->hasOneUse()) |
12166 | 0 | return SDValue(); |
12167 | | |
12168 | | // Check c0 and c1. |
12169 | 0 | auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); |
12170 | 0 | auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); |
12171 | 0 | if (!N0C || !N1C) |
12172 | 0 | return SDValue(); |
12173 | 0 | int64_t C0 = N0C->getSExtValue(); |
12174 | 0 | int64_t C1 = N1C->getSExtValue(); |
12175 | 0 | if (C0 <= 0 || C1 <= 0) |
12176 | 0 | return SDValue(); |
12177 | | |
12178 | | // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. |
12179 | 0 | int64_t Bits = std::min(C0, C1); |
12180 | 0 | int64_t Diff = std::abs(C0 - C1); |
12181 | 0 | if (Diff != 1 && Diff != 2 && Diff != 3) |
12182 | 0 | return SDValue(); |
12183 | | |
12184 | | // Build nodes. |
12185 | 0 | SDLoc DL(N); |
12186 | 0 | SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); |
12187 | 0 | SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); |
12188 | 0 | SDValue NA0 = |
12189 | 0 | DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); |
12190 | 0 | SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); |
12191 | 0 | return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); |
12192 | 0 | } |
12193 | | |
12194 | | // Combine a constant select operand into its use: |
12195 | | // |
12196 | | // (and (select cond, -1, c), x) |
12197 | | // -> (select cond, x, (and x, c)) [AllOnes=1] |
12198 | | // (or (select cond, 0, c), x) |
12199 | | // -> (select cond, x, (or x, c)) [AllOnes=0] |
12200 | | // (xor (select cond, 0, c), x) |
12201 | | // -> (select cond, x, (xor x, c)) [AllOnes=0] |
12202 | | // (add (select cond, 0, c), x) |
12203 | | // -> (select cond, x, (add x, c)) [AllOnes=0] |
12204 | | // (sub x, (select cond, 0, c)) |
12205 | | // -> (select cond, x, (sub x, c)) [AllOnes=0] |
12206 | | static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, |
12207 | | SelectionDAG &DAG, bool AllOnes, |
12208 | 627k | const RISCVSubtarget &Subtarget) { |
12209 | 627k | EVT VT = N->getValueType(0); |
12210 | | |
12211 | | // Skip vectors. |
12212 | 627k | if (VT.isVector()) |
12213 | 0 | return SDValue(); |
12214 | | |
12215 | 627k | if (!Subtarget.hasConditionalMoveFusion()) { |
12216 | | // (select cond, x, (and x, c)) has custom lowering with Zicond. |
12217 | 627k | if ((!Subtarget.hasStdExtZicond() && |
12218 | 627k | !Subtarget.hasVendorXVentanaCondOps()) || |
12219 | 627k | N->getOpcode() != ISD::AND) |
12220 | 627k | return SDValue(); |
12221 | | |
12222 | | // Maybe harmful when condition code has multiple use. |
12223 | 0 | if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse()) |
12224 | 0 | return SDValue(); |
12225 | | |
12226 | | // Maybe harmful when VT is wider than XLen. |
12227 | 0 | if (VT.getSizeInBits() > Subtarget.getXLen()) |
12228 | 0 | return SDValue(); |
12229 | 0 | } |
12230 | | |
12231 | 0 | if ((Slct.getOpcode() != ISD::SELECT && |
12232 | 0 | Slct.getOpcode() != RISCVISD::SELECT_CC) || |
12233 | 0 | !Slct.hasOneUse()) |
12234 | 0 | return SDValue(); |
12235 | | |
12236 | 0 | auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { |
12237 | 0 | return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); |
12238 | 0 | }; |
12239 | |
|
12240 | 0 | bool SwapSelectOps; |
12241 | 0 | unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; |
12242 | 0 | SDValue TrueVal = Slct.getOperand(1 + OpOffset); |
12243 | 0 | SDValue FalseVal = Slct.getOperand(2 + OpOffset); |
12244 | 0 | SDValue NonConstantVal; |
12245 | 0 | if (isZeroOrAllOnes(TrueVal, AllOnes)) { |
12246 | 0 | SwapSelectOps = false; |
12247 | 0 | NonConstantVal = FalseVal; |
12248 | 0 | } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { |
12249 | 0 | SwapSelectOps = true; |
12250 | 0 | NonConstantVal = TrueVal; |
12251 | 0 | } else |
12252 | 0 | return SDValue(); |
12253 | | |
12254 | | // Slct is now know to be the desired identity constant when CC is true. |
12255 | 0 | TrueVal = OtherOp; |
12256 | 0 | FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); |
12257 | | // Unless SwapSelectOps says the condition should be false. |
12258 | 0 | if (SwapSelectOps) |
12259 | 0 | std::swap(TrueVal, FalseVal); |
12260 | |
|
12261 | 0 | if (Slct.getOpcode() == RISCVISD::SELECT_CC) |
12262 | 0 | return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, |
12263 | 0 | {Slct.getOperand(0), Slct.getOperand(1), |
12264 | 0 | Slct.getOperand(2), TrueVal, FalseVal}); |
12265 | | |
12266 | 0 | return DAG.getNode(ISD::SELECT, SDLoc(N), VT, |
12267 | 0 | {Slct.getOperand(0), TrueVal, FalseVal}); |
12268 | 0 | } |
12269 | | |
12270 | | // Attempt combineSelectAndUse on each operand of a commutative operator N. |
12271 | | static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, |
12272 | | bool AllOnes, |
12273 | 297k | const RISCVSubtarget &Subtarget) { |
12274 | 297k | SDValue N0 = N->getOperand(0); |
12275 | 297k | SDValue N1 = N->getOperand(1); |
12276 | 297k | if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) |
12277 | 0 | return Result; |
12278 | 297k | if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) |
12279 | 0 | return Result; |
12280 | 297k | return SDValue(); |
12281 | 297k | } |
12282 | | |
12283 | | // Transform (add (mul x, c0), c1) -> |
12284 | | // (add (mul (add x, c1/c0), c0), c1%c0). |
12285 | | // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case |
12286 | | // that should be excluded is when c0*(c1/c0) is simm12, which will lead |
12287 | | // to an infinite loop in DAGCombine if transformed. |
12288 | | // Or transform (add (mul x, c0), c1) -> |
12289 | | // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), |
12290 | | // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner |
12291 | | // case that should be excluded is when c0*(c1/c0+1) is simm12, which will |
12292 | | // lead to an infinite loop in DAGCombine if transformed. |
12293 | | // Or transform (add (mul x, c0), c1) -> |
12294 | | // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), |
12295 | | // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner |
12296 | | // case that should be excluded is when c0*(c1/c0-1) is simm12, which will |
12297 | | // lead to an infinite loop in DAGCombine if transformed. |
12298 | | // Or transform (add (mul x, c0), c1) -> |
12299 | | // (mul (add x, c1/c0), c0). |
12300 | | // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. |
12301 | | static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, |
12302 | 80.5k | const RISCVSubtarget &Subtarget) { |
12303 | | // Skip for vector types and larger types. |
12304 | 80.5k | EVT VT = N->getValueType(0); |
12305 | 80.5k | if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) |
12306 | 0 | return SDValue(); |
12307 | | // The first operand node must be a MUL and has no other use. |
12308 | 80.5k | SDValue N0 = N->getOperand(0); |
12309 | 80.5k | if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) |
12310 | 79.6k | return SDValue(); |
12311 | | // Check if c0 and c1 match above conditions. |
12312 | 910 | auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); |
12313 | 910 | auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
12314 | 910 | if (!N0C || !N1C) |
12315 | 12 | return SDValue(); |
12316 | | // If N0C has multiple uses it's possible one of the cases in |
12317 | | // DAGCombiner::isMulAddWithConstProfitable will be true, which would result |
12318 | | // in an infinite loop. |
12319 | 898 | if (!N0C->hasOneUse()) |
12320 | 26 | return SDValue(); |
12321 | 872 | int64_t C0 = N0C->getSExtValue(); |
12322 | 872 | int64_t C1 = N1C->getSExtValue(); |
12323 | 872 | int64_t CA, CB; |
12324 | 872 | if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) |
12325 | 872 | return SDValue(); |
12326 | | // Search for proper CA (non-zero) and CB that both are simm12. |
12327 | 0 | if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && |
12328 | 0 | !isInt<12>(C0 * (C1 / C0))) { |
12329 | 0 | CA = C1 / C0; |
12330 | 0 | CB = C1 % C0; |
12331 | 0 | } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && |
12332 | 0 | isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { |
12333 | 0 | CA = C1 / C0 + 1; |
12334 | 0 | CB = C1 % C0 - C0; |
12335 | 0 | } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && |
12336 | 0 | isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { |
12337 | 0 | CA = C1 / C0 - 1; |
12338 | 0 | CB = C1 % C0 + C0; |
12339 | 0 | } else |
12340 | 0 | return SDValue(); |
12341 | | // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). |
12342 | 0 | SDLoc DL(N); |
12343 | 0 | SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), |
12344 | 0 | DAG.getConstant(CA, DL, VT)); |
12345 | 0 | SDValue New1 = |
12346 | 0 | DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); |
12347 | 0 | return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); |
12348 | 0 | } |
12349 | | |
12350 | | // Try to turn (add (xor bool, 1) -1) into (neg bool). |
12351 | 80.5k | static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) { |
12352 | 80.5k | SDValue N0 = N->getOperand(0); |
12353 | 80.5k | SDValue N1 = N->getOperand(1); |
12354 | 80.5k | EVT VT = N->getValueType(0); |
12355 | 80.5k | SDLoc DL(N); |
12356 | | |
12357 | | // RHS should be -1. |
12358 | 80.5k | if (!isAllOnesConstant(N1)) |
12359 | 77.2k | return SDValue(); |
12360 | | |
12361 | | // Look for (xor X, 1). |
12362 | 3.32k | if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1))) |
12363 | 3.32k | return SDValue(); |
12364 | | |
12365 | | // First xor input should be 0 or 1. |
12366 | 3 | APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); |
12367 | 3 | if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask)) |
12368 | 3 | return SDValue(); |
12369 | | |
12370 | | // Emit a negate of the setcc. |
12371 | 0 | return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
12372 | 0 | N0.getOperand(0)); |
12373 | 3 | } |
12374 | | |
12375 | | static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, |
12376 | 80.5k | const RISCVSubtarget &Subtarget) { |
12377 | 80.5k | if (SDValue V = combineAddOfBooleanXor(N, DAG)) |
12378 | 0 | return V; |
12379 | 80.5k | if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) |
12380 | 0 | return V; |
12381 | 80.5k | if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) |
12382 | 0 | return V; |
12383 | 80.5k | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
12384 | 0 | return V; |
12385 | 80.5k | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
12386 | 0 | return V; |
12387 | | |
12388 | | // fold (add (select lhs, rhs, cc, 0, y), x) -> |
12389 | | // (select lhs, rhs, cc, x, (add x, y)) |
12390 | 80.5k | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
12391 | 80.5k | } |
12392 | | |
12393 | | // Try to turn a sub boolean RHS and constant LHS into an addi. |
12394 | 32.5k | static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { |
12395 | 32.5k | SDValue N0 = N->getOperand(0); |
12396 | 32.5k | SDValue N1 = N->getOperand(1); |
12397 | 32.5k | EVT VT = N->getValueType(0); |
12398 | 32.5k | SDLoc DL(N); |
12399 | | |
12400 | | // Require a constant LHS. |
12401 | 32.5k | auto *N0C = dyn_cast<ConstantSDNode>(N0); |
12402 | 32.5k | if (!N0C) |
12403 | 23.2k | return SDValue(); |
12404 | | |
12405 | | // All our optimizations involve subtracting 1 from the immediate and forming |
12406 | | // an ADDI. Make sure the new immediate is valid for an ADDI. |
12407 | 9.28k | APInt ImmValMinus1 = N0C->getAPIntValue() - 1; |
12408 | 9.28k | if (!ImmValMinus1.isSignedIntN(12)) |
12409 | 193 | return SDValue(); |
12410 | | |
12411 | 9.09k | SDValue NewLHS; |
12412 | 9.09k | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { |
12413 | | // (sub constant, (setcc x, y, eq/neq)) -> |
12414 | | // (add (setcc x, y, neq/eq), constant - 1) |
12415 | 116 | ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); |
12416 | 116 | EVT SetCCOpVT = N1.getOperand(0).getValueType(); |
12417 | 116 | if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) |
12418 | 98 | return SDValue(); |
12419 | 18 | CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); |
12420 | 18 | NewLHS = |
12421 | 18 | DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); |
12422 | 8.97k | } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && |
12423 | 8.97k | N1.getOperand(0).getOpcode() == ISD::SETCC) { |
12424 | | // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). |
12425 | | // Since setcc returns a bool the xor is equivalent to 1-setcc. |
12426 | 216 | NewLHS = N1.getOperand(0); |
12427 | 216 | } else |
12428 | 8.76k | return SDValue(); |
12429 | | |
12430 | 234 | SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); |
12431 | 234 | return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); |
12432 | 9.09k | } |
12433 | | |
12434 | | static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, |
12435 | 32.5k | const RISCVSubtarget &Subtarget) { |
12436 | 32.5k | if (SDValue V = combineSubOfBoolean(N, DAG)) |
12437 | 234 | return V; |
12438 | | |
12439 | 32.3k | SDValue N0 = N->getOperand(0); |
12440 | 32.3k | SDValue N1 = N->getOperand(1); |
12441 | | // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) |
12442 | 32.3k | if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
12443 | 32.3k | isNullConstant(N1.getOperand(1))) { |
12444 | 10 | ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); |
12445 | 10 | if (CCVal == ISD::SETLT) { |
12446 | 10 | EVT VT = N->getValueType(0); |
12447 | 10 | SDLoc DL(N); |
12448 | 10 | unsigned ShAmt = N0.getValueSizeInBits() - 1; |
12449 | 10 | return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), |
12450 | 10 | DAG.getConstant(ShAmt, DL, VT)); |
12451 | 10 | } |
12452 | 10 | } |
12453 | | |
12454 | | // fold (sub x, (select lhs, rhs, cc, 0, y)) -> |
12455 | | // (select lhs, rhs, cc, x, (sub x, y)) |
12456 | 32.2k | return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); |
12457 | 32.3k | } |
12458 | | |
12459 | | // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. |
12460 | | // Legalizing setcc can introduce xors like this. Doing this transform reduces |
12461 | | // the number of xors and may allow the xor to fold into a branch condition. |
12462 | 58.5k | static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { |
12463 | 58.5k | SDValue N0 = N->getOperand(0); |
12464 | 58.5k | SDValue N1 = N->getOperand(1); |
12465 | 58.5k | bool IsAnd = N->getOpcode() == ISD::AND; |
12466 | | |
12467 | 58.5k | if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) |
12468 | 58.2k | return SDValue(); |
12469 | | |
12470 | 337 | if (!N0.hasOneUse() || !N1.hasOneUse()) |
12471 | 203 | return SDValue(); |
12472 | | |
12473 | 134 | SDValue N01 = N0.getOperand(1); |
12474 | 134 | SDValue N11 = N1.getOperand(1); |
12475 | | |
12476 | | // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into |
12477 | | // (xor X, -1) based on the upper bits of the other operand being 0. If the |
12478 | | // operation is And, allow one of the Xors to use -1. |
12479 | 134 | if (isOneConstant(N01)) { |
12480 | 67 | if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) |
12481 | 1 | return SDValue(); |
12482 | 67 | } else if (isOneConstant(N11)) { |
12483 | | // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. |
12484 | 17 | if (!(IsAnd && isAllOnesConstant(N01))) |
12485 | 2 | return SDValue(); |
12486 | 17 | } else |
12487 | 50 | return SDValue(); |
12488 | | |
12489 | 81 | EVT VT = N->getValueType(0); |
12490 | | |
12491 | 81 | SDValue N00 = N0.getOperand(0); |
12492 | 81 | SDValue N10 = N1.getOperand(0); |
12493 | | |
12494 | | // The LHS of the xors needs to be 0/1. |
12495 | 81 | APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); |
12496 | 81 | if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) |
12497 | 74 | return SDValue(); |
12498 | | |
12499 | | // Invert the opcode and insert a new xor. |
12500 | 7 | SDLoc DL(N); |
12501 | 7 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
12502 | 7 | SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); |
12503 | 7 | return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); |
12504 | 81 | } |
12505 | | |
12506 | | static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, |
12507 | 0 | const RISCVSubtarget &Subtarget) { |
12508 | 0 | SDValue N0 = N->getOperand(0); |
12509 | 0 | EVT VT = N->getValueType(0); |
12510 | | |
12511 | | // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero |
12512 | | // extending X. This is safe since we only need the LSB after the shift and |
12513 | | // shift amounts larger than 31 would produce poison. If we wait until |
12514 | | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
12515 | | // to use a BEXT instruction. |
12516 | 0 | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && |
12517 | 0 | N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && |
12518 | 0 | !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { |
12519 | 0 | SDLoc DL(N0); |
12520 | 0 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
12521 | 0 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
12522 | 0 | SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); |
12523 | 0 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); |
12524 | 0 | } |
12525 | | |
12526 | 0 | return SDValue(); |
12527 | 0 | } |
12528 | | |
12529 | | // Combines two comparison operation and logic operation to one selection |
12530 | | // operation(min, max) and logic operation. Returns new constructed Node if |
12531 | | // conditions for optimization are satisfied. |
12532 | | static SDValue performANDCombine(SDNode *N, |
12533 | | TargetLowering::DAGCombinerInfo &DCI, |
12534 | 83.6k | const RISCVSubtarget &Subtarget) { |
12535 | 83.6k | SelectionDAG &DAG = DCI.DAG; |
12536 | | |
12537 | 83.6k | SDValue N0 = N->getOperand(0); |
12538 | | // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero |
12539 | | // extending X. This is safe since we only need the LSB after the shift and |
12540 | | // shift amounts larger than 31 would produce poison. If we wait until |
12541 | | // type legalization, we'll create RISCVISD::SRLW and we can't recover it |
12542 | | // to use a BEXT instruction. |
12543 | 83.6k | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
12544 | 83.6k | N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && |
12545 | 83.6k | N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && |
12546 | 83.6k | N0.hasOneUse()) { |
12547 | 0 | SDLoc DL(N); |
12548 | 0 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
12549 | 0 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
12550 | 0 | SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); |
12551 | 0 | SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, |
12552 | 0 | DAG.getConstant(1, DL, MVT::i64)); |
12553 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); |
12554 | 0 | } |
12555 | | |
12556 | 83.6k | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
12557 | 0 | return V; |
12558 | 83.6k | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
12559 | 0 | return V; |
12560 | | |
12561 | 83.6k | if (DCI.isAfterLegalizeDAG()) |
12562 | 43.4k | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
12563 | 5 | return V; |
12564 | | |
12565 | | // fold (and (select lhs, rhs, cc, -1, y), x) -> |
12566 | | // (select lhs, rhs, cc, x, (and x, y)) |
12567 | 83.6k | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); |
12568 | 83.6k | } |
12569 | | |
12570 | | // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez. |
12571 | | // FIXME: Generalize to other binary operators with same operand. |
12572 | | static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, |
12573 | 100k | SelectionDAG &DAG) { |
12574 | 100k | assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); |
12575 | | |
12576 | 100k | if (N0.getOpcode() != RISCVISD::CZERO_EQZ || |
12577 | 100k | N1.getOpcode() != RISCVISD::CZERO_NEZ || |
12578 | 100k | !N0.hasOneUse() || !N1.hasOneUse()) |
12579 | 100k | return SDValue(); |
12580 | | |
12581 | | // Should have the same condition. |
12582 | 0 | SDValue Cond = N0.getOperand(1); |
12583 | 0 | if (Cond != N1.getOperand(1)) |
12584 | 0 | return SDValue(); |
12585 | | |
12586 | 0 | SDValue TrueV = N0.getOperand(0); |
12587 | 0 | SDValue FalseV = N1.getOperand(0); |
12588 | |
|
12589 | 0 | if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR || |
12590 | 0 | TrueV.getOperand(1) != FalseV.getOperand(1) || |
12591 | 0 | !isOneConstant(TrueV.getOperand(1)) || |
12592 | 0 | !TrueV.hasOneUse() || !FalseV.hasOneUse()) |
12593 | 0 | return SDValue(); |
12594 | | |
12595 | 0 | EVT VT = N->getValueType(0); |
12596 | 0 | SDLoc DL(N); |
12597 | |
|
12598 | 0 | SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), |
12599 | 0 | Cond); |
12600 | 0 | SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), |
12601 | 0 | Cond); |
12602 | 0 | SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); |
12603 | 0 | return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); |
12604 | 0 | } |
12605 | | |
12606 | | static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
12607 | 50.3k | const RISCVSubtarget &Subtarget) { |
12608 | 50.3k | SelectionDAG &DAG = DCI.DAG; |
12609 | | |
12610 | 50.3k | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
12611 | 0 | return V; |
12612 | 50.3k | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
12613 | 0 | return V; |
12614 | | |
12615 | 50.3k | if (DCI.isAfterLegalizeDAG()) |
12616 | 15.1k | if (SDValue V = combineDeMorganOfBoolean(N, DAG)) |
12617 | 2 | return V; |
12618 | | |
12619 | | // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom. |
12620 | | // We may be able to pull a common operation out of the true and false value. |
12621 | 50.2k | SDValue N0 = N->getOperand(0); |
12622 | 50.2k | SDValue N1 = N->getOperand(1); |
12623 | 50.2k | if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG)) |
12624 | 0 | return V; |
12625 | 50.2k | if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG)) |
12626 | 0 | return V; |
12627 | | |
12628 | | // fold (or (select cond, 0, y), x) -> |
12629 | | // (select cond, x, (or x, y)) |
12630 | 50.2k | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
12631 | 50.2k | } |
12632 | | |
12633 | | static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, |
12634 | 82.9k | const RISCVSubtarget &Subtarget) { |
12635 | 82.9k | SDValue N0 = N->getOperand(0); |
12636 | 82.9k | SDValue N1 = N->getOperand(1); |
12637 | | |
12638 | | // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use |
12639 | | // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create |
12640 | | // RISCVISD:::SLLW and we can't recover it to use a BSET instruction. |
12641 | 82.9k | if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && |
12642 | 82.9k | N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) && |
12643 | 82.9k | N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) && |
12644 | 82.9k | !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { |
12645 | 0 | SDLoc DL(N); |
12646 | 0 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); |
12647 | 0 | SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); |
12648 | 0 | SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); |
12649 | 0 | SDValue And = DAG.getNOT(DL, Shl, MVT::i64); |
12650 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); |
12651 | 0 | } |
12652 | | |
12653 | | // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) |
12654 | | // NOTE: Assumes ROL being legal means ROLW is legal. |
12655 | 82.9k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12656 | 82.9k | if (N0.getOpcode() == RISCVISD::SLLW && |
12657 | 82.9k | isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && |
12658 | 82.9k | TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { |
12659 | 0 | SDLoc DL(N); |
12660 | 0 | return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, |
12661 | 0 | DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); |
12662 | 0 | } |
12663 | | |
12664 | | // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt) |
12665 | 82.9k | if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) { |
12666 | 29.3k | auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0)); |
12667 | 29.3k | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
12668 | 29.3k | if (ConstN00 && CC == ISD::SETLT) { |
12669 | 0 | EVT VT = N0.getValueType(); |
12670 | 0 | SDLoc DL(N0); |
12671 | 0 | const APInt &Imm = ConstN00->getAPIntValue(); |
12672 | 0 | if ((Imm + 1).isSignedIntN(12)) |
12673 | 0 | return DAG.getSetCC(DL, VT, N0.getOperand(1), |
12674 | 0 | DAG.getConstant(Imm + 1, DL, VT), CC); |
12675 | 0 | } |
12676 | 29.3k | } |
12677 | | |
12678 | 82.9k | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
12679 | 0 | return V; |
12680 | 82.9k | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
12681 | 0 | return V; |
12682 | | |
12683 | | // fold (xor (select cond, 0, y), x) -> |
12684 | | // (select cond, x, (xor x, y)) |
12685 | 82.9k | return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); |
12686 | 82.9k | } |
12687 | | |
12688 | 0 | static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { |
12689 | 0 | EVT VT = N->getValueType(0); |
12690 | 0 | if (!VT.isVector()) |
12691 | 0 | return SDValue(); |
12692 | | |
12693 | 0 | SDLoc DL(N); |
12694 | 0 | SDValue N0 = N->getOperand(0); |
12695 | 0 | SDValue N1 = N->getOperand(1); |
12696 | 0 | SDValue MulOper; |
12697 | 0 | unsigned AddSubOpc; |
12698 | | |
12699 | | // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y) |
12700 | | // (mul x, add (y, 1)) -> (add x, (mul x, y)) |
12701 | | // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y)) |
12702 | | // (mul x, (sub 1, y)) -> (sub x, (mul x, y)) |
12703 | 0 | auto IsAddSubWith1 = [&](SDValue V) -> bool { |
12704 | 0 | AddSubOpc = V->getOpcode(); |
12705 | 0 | if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { |
12706 | 0 | SDValue Opnd = V->getOperand(1); |
12707 | 0 | MulOper = V->getOperand(0); |
12708 | 0 | if (AddSubOpc == ISD::SUB) |
12709 | 0 | std::swap(Opnd, MulOper); |
12710 | 0 | if (isOneOrOneSplat(Opnd)) |
12711 | 0 | return true; |
12712 | 0 | } |
12713 | 0 | return false; |
12714 | 0 | }; |
12715 | |
|
12716 | 0 | if (IsAddSubWith1(N0)) { |
12717 | 0 | SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); |
12718 | 0 | return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); |
12719 | 0 | } |
12720 | | |
12721 | 0 | if (IsAddSubWith1(N1)) { |
12722 | 0 | SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); |
12723 | 0 | return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); |
12724 | 0 | } |
12725 | | |
12726 | 0 | return SDValue(); |
12727 | 0 | } |
12728 | | |
12729 | | /// According to the property that indexed load/store instructions zero-extend |
12730 | | /// their indices, try to narrow the type of index operand. |
12731 | 0 | static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) { |
12732 | 0 | if (isIndexTypeSigned(IndexType)) |
12733 | 0 | return false; |
12734 | | |
12735 | 0 | if (!N->hasOneUse()) |
12736 | 0 | return false; |
12737 | | |
12738 | 0 | EVT VT = N.getValueType(); |
12739 | 0 | SDLoc DL(N); |
12740 | | |
12741 | | // In general, what we're doing here is seeing if we can sink a truncate to |
12742 | | // a smaller element type into the expression tree building our index. |
12743 | | // TODO: We can generalize this and handle a bunch more cases if useful. |
12744 | | |
12745 | | // Narrow a buildvector to the narrowest element type. This requires less |
12746 | | // work and less register pressure at high LMUL, and creates smaller constants |
12747 | | // which may be cheaper to materialize. |
12748 | 0 | if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) { |
12749 | 0 | KnownBits Known = DAG.computeKnownBits(N); |
12750 | 0 | unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits()); |
12751 | 0 | LLVMContext &C = *DAG.getContext(); |
12752 | 0 | EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C); |
12753 | 0 | if (ResultVT.bitsLT(VT.getVectorElementType())) { |
12754 | 0 | N = DAG.getNode(ISD::TRUNCATE, DL, |
12755 | 0 | VT.changeVectorElementType(ResultVT), N); |
12756 | 0 | return true; |
12757 | 0 | } |
12758 | 0 | } |
12759 | | |
12760 | | // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty). |
12761 | 0 | if (N.getOpcode() != ISD::SHL) |
12762 | 0 | return false; |
12763 | | |
12764 | 0 | SDValue N0 = N.getOperand(0); |
12765 | 0 | if (N0.getOpcode() != ISD::ZERO_EXTEND && |
12766 | 0 | N0.getOpcode() != RISCVISD::VZEXT_VL) |
12767 | 0 | return false;; |
12768 | 0 | if (!N0->hasOneUse()) |
12769 | 0 | return false;; |
12770 | |
|
12771 | 0 | APInt ShAmt; |
12772 | 0 | SDValue N1 = N.getOperand(1); |
12773 | 0 | if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) |
12774 | 0 | return false;; |
12775 | |
|
12776 | 0 | SDValue Src = N0.getOperand(0); |
12777 | 0 | EVT SrcVT = Src.getValueType(); |
12778 | 0 | unsigned SrcElen = SrcVT.getScalarSizeInBits(); |
12779 | 0 | unsigned ShAmtV = ShAmt.getZExtValue(); |
12780 | 0 | unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV); |
12781 | 0 | NewElen = std::max(NewElen, 8U); |
12782 | | |
12783 | | // Skip if NewElen is not narrower than the original extended type. |
12784 | 0 | if (NewElen >= N0.getValueType().getScalarSizeInBits()) |
12785 | 0 | return false; |
12786 | | |
12787 | 0 | EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen); |
12788 | 0 | EVT NewVT = SrcVT.changeVectorElementType(NewEltVT); |
12789 | |
|
12790 | 0 | SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops()); |
12791 | 0 | SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT); |
12792 | 0 | N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec); |
12793 | 0 | return true; |
12794 | 0 | } |
12795 | | |
12796 | | // Replace (seteq (i64 (and X, 0xffffffff)), C1) with |
12797 | | // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from |
12798 | | // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg |
12799 | | // can become a sext.w instead of a shift pair. |
12800 | | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
12801 | 183k | const RISCVSubtarget &Subtarget) { |
12802 | 183k | SDValue N0 = N->getOperand(0); |
12803 | 183k | SDValue N1 = N->getOperand(1); |
12804 | 183k | EVT VT = N->getValueType(0); |
12805 | 183k | EVT OpVT = N0.getValueType(); |
12806 | | |
12807 | 183k | if (OpVT != MVT::i64 || !Subtarget.is64Bit()) |
12808 | 56.3k | return SDValue(); |
12809 | | |
12810 | | // RHS needs to be a constant. |
12811 | 127k | auto *N1C = dyn_cast<ConstantSDNode>(N1); |
12812 | 127k | if (!N1C) |
12813 | 45.7k | return SDValue(); |
12814 | | |
12815 | | // LHS needs to be (and X, 0xffffffff). |
12816 | 81.6k | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || |
12817 | 81.6k | !isa<ConstantSDNode>(N0.getOperand(1)) || |
12818 | 81.6k | N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) |
12819 | 81.6k | return SDValue(); |
12820 | | |
12821 | | // Looking for an equality compare. |
12822 | 0 | ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
12823 | 0 | if (!isIntEqualitySetCC(Cond)) |
12824 | 0 | return SDValue(); |
12825 | | |
12826 | | // Don't do this if the sign bit is provably zero, it will be turned back into |
12827 | | // an AND. |
12828 | 0 | APInt SignMask = APInt::getOneBitSet(64, 31); |
12829 | 0 | if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) |
12830 | 0 | return SDValue(); |
12831 | | |
12832 | 0 | const APInt &C1 = N1C->getAPIntValue(); |
12833 | |
|
12834 | 0 | SDLoc dl(N); |
12835 | | // If the constant is larger than 2^32 - 1 it is impossible for both sides |
12836 | | // to be equal. |
12837 | 0 | if (C1.getActiveBits() > 32) |
12838 | 0 | return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); |
12839 | | |
12840 | 0 | SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, |
12841 | 0 | N0.getOperand(0), DAG.getValueType(MVT::i32)); |
12842 | 0 | return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), |
12843 | 0 | dl, OpVT), Cond); |
12844 | 0 | } |
12845 | | |
12846 | | static SDValue |
12847 | | performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, |
12848 | 0 | const RISCVSubtarget &Subtarget) { |
12849 | 0 | SDValue Src = N->getOperand(0); |
12850 | 0 | EVT VT = N->getValueType(0); |
12851 | | |
12852 | | // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) |
12853 | 0 | if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
12854 | 0 | cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) |
12855 | 0 | return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, |
12856 | 0 | Src.getOperand(0)); |
12857 | | |
12858 | 0 | return SDValue(); |
12859 | 0 | } |
12860 | | |
12861 | | namespace { |
12862 | | // Forward declaration of the structure holding the necessary information to |
12863 | | // apply a combine. |
12864 | | struct CombineResult; |
12865 | | |
12866 | | /// Helper class for folding sign/zero extensions. |
12867 | | /// In particular, this class is used for the following combines: |
12868 | | /// add_vl -> vwadd(u) | vwadd(u)_w |
12869 | | /// sub_vl -> vwsub(u) | vwsub(u)_w |
12870 | | /// mul_vl -> vwmul(u) | vwmul_su |
12871 | | /// |
12872 | | /// An object of this class represents an operand of the operation we want to |
12873 | | /// combine. |
12874 | | /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of |
12875 | | /// NodeExtensionHelper for `a` and one for `b`. |
12876 | | /// |
12877 | | /// This class abstracts away how the extension is materialized and |
12878 | | /// how its Mask, VL, number of users affect the combines. |
12879 | | /// |
12880 | | /// In particular: |
12881 | | /// - VWADD_W is conceptually == add(op0, sext(op1)) |
12882 | | /// - VWADDU_W == add(op0, zext(op1)) |
12883 | | /// - VWSUB_W == sub(op0, sext(op1)) |
12884 | | /// - VWSUBU_W == sub(op0, zext(op1)) |
12885 | | /// |
12886 | | /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to |
12887 | | /// zext|sext(smaller_value). |
12888 | | struct NodeExtensionHelper { |
12889 | | /// Records if this operand is like being zero extended. |
12890 | | bool SupportsZExt; |
12891 | | /// Records if this operand is like being sign extended. |
12892 | | /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For |
12893 | | /// instance, a splat constant (e.g., 3), would support being both sign and |
12894 | | /// zero extended. |
12895 | | bool SupportsSExt; |
12896 | | /// This boolean captures whether we care if this operand would still be |
12897 | | /// around after the folding happens. |
12898 | | bool EnforceOneUse; |
12899 | | /// Records if this operand's mask needs to match the mask of the operation |
12900 | | /// that it will fold into. |
12901 | | bool CheckMask; |
12902 | | /// Value of the Mask for this operand. |
12903 | | /// It may be SDValue(). |
12904 | | SDValue Mask; |
12905 | | /// Value of the vector length operand. |
12906 | | /// It may be SDValue(). |
12907 | | SDValue VL; |
12908 | | /// Original value that this NodeExtensionHelper represents. |
12909 | | SDValue OrigOperand; |
12910 | | |
12911 | | /// Get the value feeding the extension or the value itself. |
12912 | | /// E.g., for zext(a), this would return a. |
12913 | 0 | SDValue getSource() const { |
12914 | 0 | switch (OrigOperand.getOpcode()) { |
12915 | 0 | case RISCVISD::VSEXT_VL: |
12916 | 0 | case RISCVISD::VZEXT_VL: |
12917 | 0 | return OrigOperand.getOperand(0); |
12918 | 0 | default: |
12919 | 0 | return OrigOperand; |
12920 | 0 | } |
12921 | 0 | } |
12922 | | |
12923 | | /// Check if this instance represents a splat. |
12924 | 0 | bool isSplat() const { |
12925 | 0 | return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; |
12926 | 0 | } |
12927 | | |
12928 | | /// Get or create a value that can feed \p Root with the given extension \p |
12929 | | /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. |
12930 | | /// \see ::getSource(). |
12931 | | SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, |
12932 | 0 | std::optional<bool> SExt) const { |
12933 | 0 | if (!SExt.has_value()) |
12934 | 0 | return OrigOperand; |
12935 | | |
12936 | 0 | MVT NarrowVT = getNarrowType(Root); |
12937 | |
|
12938 | 0 | SDValue Source = getSource(); |
12939 | 0 | if (Source.getValueType() == NarrowVT) |
12940 | 0 | return Source; |
12941 | | |
12942 | 0 | unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; |
12943 | | |
12944 | | // If we need an extension, we should be changing the type. |
12945 | 0 | SDLoc DL(Root); |
12946 | 0 | auto [Mask, VL] = getMaskAndVL(Root); |
12947 | 0 | switch (OrigOperand.getOpcode()) { |
12948 | 0 | case RISCVISD::VSEXT_VL: |
12949 | 0 | case RISCVISD::VZEXT_VL: |
12950 | 0 | return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); |
12951 | 0 | case RISCVISD::VMV_V_X_VL: |
12952 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, |
12953 | 0 | DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); |
12954 | 0 | default: |
12955 | | // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL |
12956 | | // and that operand should already have the right NarrowVT so no |
12957 | | // extension should be required at this point. |
12958 | 0 | llvm_unreachable("Unsupported opcode"); |
12959 | 0 | } |
12960 | 0 | } |
12961 | | |
12962 | | /// Helper function to get the narrow type for \p Root. |
12963 | | /// The narrow type is the type of \p Root where we divided the size of each |
12964 | | /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. |
12965 | | /// \pre The size of the type of the elements of Root must be a multiple of 2 |
12966 | | /// and be greater than 16. |
12967 | 0 | static MVT getNarrowType(const SDNode *Root) { |
12968 | 0 | MVT VT = Root->getSimpleValueType(0); |
12969 | | |
12970 | | // Determine the narrow size. |
12971 | 0 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
12972 | 0 | assert(NarrowSize >= 8 && "Trying to extend something we can't represent"); |
12973 | 0 | MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), |
12974 | 0 | VT.getVectorElementCount()); |
12975 | 0 | return NarrowVT; |
12976 | 0 | } |
12977 | | |
12978 | | /// Return the opcode required to materialize the folding of the sign |
12979 | | /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for |
12980 | | /// both operands for \p Opcode. |
12981 | | /// Put differently, get the opcode to materialize: |
12982 | | /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b) |
12983 | | /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b) |
12984 | | /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). |
12985 | 0 | static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { |
12986 | 0 | switch (Opcode) { |
12987 | 0 | case RISCVISD::ADD_VL: |
12988 | 0 | case RISCVISD::VWADD_W_VL: |
12989 | 0 | case RISCVISD::VWADDU_W_VL: |
12990 | 0 | return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; |
12991 | 0 | case RISCVISD::MUL_VL: |
12992 | 0 | return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; |
12993 | 0 | case RISCVISD::SUB_VL: |
12994 | 0 | case RISCVISD::VWSUB_W_VL: |
12995 | 0 | case RISCVISD::VWSUBU_W_VL: |
12996 | 0 | return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL; |
12997 | 0 | default: |
12998 | 0 | llvm_unreachable("Unexpected opcode"); |
12999 | 0 | } |
13000 | 0 | } |
13001 | | |
13002 | | /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> |
13003 | | /// newOpcode(a, b). |
13004 | 0 | static unsigned getSUOpcode(unsigned Opcode) { |
13005 | 0 | assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); |
13006 | 0 | return RISCVISD::VWMULSU_VL; |
13007 | 0 | } |
13008 | | |
13009 | | /// Get the opcode to materialize \p Opcode(a, s|zext(b)) -> |
13010 | | /// newOpcode(a, b). |
13011 | 0 | static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { |
13012 | 0 | switch (Opcode) { |
13013 | 0 | case RISCVISD::ADD_VL: |
13014 | 0 | return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; |
13015 | 0 | case RISCVISD::SUB_VL: |
13016 | 0 | return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; |
13017 | 0 | default: |
13018 | 0 | llvm_unreachable("Unexpected opcode"); |
13019 | 0 | } |
13020 | 0 | } |
13021 | | |
13022 | | using CombineToTry = std::function<std::optional<CombineResult>( |
13023 | | SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, |
13024 | | const NodeExtensionHelper & /*RHS*/)>; |
13025 | | |
13026 | | /// Check if this node needs to be fully folded or extended for all users. |
13027 | 0 | bool needToPromoteOtherUsers() const { return EnforceOneUse; } |
13028 | | |
13029 | | /// Helper method to set the various fields of this struct based on the |
13030 | | /// type of \p Root. |
13031 | 0 | void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { |
13032 | 0 | SupportsZExt = false; |
13033 | 0 | SupportsSExt = false; |
13034 | 0 | EnforceOneUse = true; |
13035 | 0 | CheckMask = true; |
13036 | 0 | switch (OrigOperand.getOpcode()) { |
13037 | 0 | case RISCVISD::VZEXT_VL: |
13038 | 0 | SupportsZExt = true; |
13039 | 0 | Mask = OrigOperand.getOperand(1); |
13040 | 0 | VL = OrigOperand.getOperand(2); |
13041 | 0 | break; |
13042 | 0 | case RISCVISD::VSEXT_VL: |
13043 | 0 | SupportsSExt = true; |
13044 | 0 | Mask = OrigOperand.getOperand(1); |
13045 | 0 | VL = OrigOperand.getOperand(2); |
13046 | 0 | break; |
13047 | 0 | case RISCVISD::VMV_V_X_VL: { |
13048 | | // Historically, we didn't care about splat values not disappearing during |
13049 | | // combines. |
13050 | 0 | EnforceOneUse = false; |
13051 | 0 | CheckMask = false; |
13052 | 0 | VL = OrigOperand.getOperand(2); |
13053 | | |
13054 | | // The operand is a splat of a scalar. |
13055 | | |
13056 | | // The pasthru must be undef for tail agnostic. |
13057 | 0 | if (!OrigOperand.getOperand(0).isUndef()) |
13058 | 0 | break; |
13059 | | |
13060 | | // Get the scalar value. |
13061 | 0 | SDValue Op = OrigOperand.getOperand(1); |
13062 | | |
13063 | | // See if we have enough sign bits or zero bits in the scalar to use a |
13064 | | // widening opcode by splatting to smaller element size. |
13065 | 0 | MVT VT = Root->getSimpleValueType(0); |
13066 | 0 | unsigned EltBits = VT.getScalarSizeInBits(); |
13067 | 0 | unsigned ScalarBits = Op.getValueSizeInBits(); |
13068 | | // Make sure we're getting all element bits from the scalar register. |
13069 | | // FIXME: Support implicit sign extension of vmv.v.x? |
13070 | 0 | if (ScalarBits < EltBits) |
13071 | 0 | break; |
13072 | | |
13073 | 0 | unsigned NarrowSize = VT.getScalarSizeInBits() / 2; |
13074 | | // If the narrow type cannot be expressed with a legal VMV, |
13075 | | // this is not a valid candidate. |
13076 | 0 | if (NarrowSize < 8) |
13077 | 0 | break; |
13078 | | |
13079 | 0 | if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) |
13080 | 0 | SupportsSExt = true; |
13081 | 0 | if (DAG.MaskedValueIsZero(Op, |
13082 | 0 | APInt::getBitsSetFrom(ScalarBits, NarrowSize))) |
13083 | 0 | SupportsZExt = true; |
13084 | 0 | break; |
13085 | 0 | } |
13086 | 0 | default: |
13087 | 0 | break; |
13088 | 0 | } |
13089 | 0 | } |
13090 | | |
13091 | | /// Check if \p Root supports any extension folding combines. |
13092 | 0 | static bool isSupportedRoot(const SDNode *Root) { |
13093 | 0 | switch (Root->getOpcode()) { |
13094 | 0 | case RISCVISD::ADD_VL: |
13095 | 0 | case RISCVISD::MUL_VL: |
13096 | 0 | case RISCVISD::VWADD_W_VL: |
13097 | 0 | case RISCVISD::VWADDU_W_VL: |
13098 | 0 | case RISCVISD::SUB_VL: |
13099 | 0 | case RISCVISD::VWSUB_W_VL: |
13100 | 0 | case RISCVISD::VWSUBU_W_VL: |
13101 | 0 | return true; |
13102 | 0 | default: |
13103 | 0 | return false; |
13104 | 0 | } |
13105 | 0 | } |
13106 | | |
13107 | | /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). |
13108 | 0 | NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { |
13109 | 0 | assert(isSupportedRoot(Root) && "Trying to build an helper with an " |
13110 | 0 | "unsupported root"); |
13111 | 0 | assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); |
13112 | 0 | OrigOperand = Root->getOperand(OperandIdx); |
13113 | |
|
13114 | 0 | unsigned Opc = Root->getOpcode(); |
13115 | 0 | switch (Opc) { |
13116 | | // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were |
13117 | | // <ADD|SUB>(LHS, S|ZEXT(RHS)) |
13118 | 0 | case RISCVISD::VWADD_W_VL: |
13119 | 0 | case RISCVISD::VWADDU_W_VL: |
13120 | 0 | case RISCVISD::VWSUB_W_VL: |
13121 | 0 | case RISCVISD::VWSUBU_W_VL: |
13122 | 0 | if (OperandIdx == 1) { |
13123 | 0 | SupportsZExt = |
13124 | 0 | Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; |
13125 | 0 | SupportsSExt = !SupportsZExt; |
13126 | 0 | std::tie(Mask, VL) = getMaskAndVL(Root); |
13127 | 0 | CheckMask = true; |
13128 | | // There's no existing extension here, so we don't have to worry about |
13129 | | // making sure it gets removed. |
13130 | 0 | EnforceOneUse = false; |
13131 | 0 | break; |
13132 | 0 | } |
13133 | 0 | [[fallthrough]]; |
13134 | 0 | default: |
13135 | 0 | fillUpExtensionSupport(Root, DAG); |
13136 | 0 | break; |
13137 | 0 | } |
13138 | 0 | } |
13139 | | |
13140 | | /// Check if this operand is compatible with the given vector length \p VL. |
13141 | 0 | bool isVLCompatible(SDValue VL) const { |
13142 | 0 | return this->VL != SDValue() && this->VL == VL; |
13143 | 0 | } |
13144 | | |
13145 | | /// Check if this operand is compatible with the given \p Mask. |
13146 | 0 | bool isMaskCompatible(SDValue Mask) const { |
13147 | 0 | return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); |
13148 | 0 | } |
13149 | | |
13150 | | /// Helper function to get the Mask and VL from \p Root. |
13151 | 0 | static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { |
13152 | 0 | assert(isSupportedRoot(Root) && "Unexpected root"); |
13153 | 0 | return std::make_pair(Root->getOperand(3), Root->getOperand(4)); |
13154 | 0 | } |
13155 | | |
13156 | | /// Check if the Mask and VL of this operand are compatible with \p Root. |
13157 | 0 | bool areVLAndMaskCompatible(const SDNode *Root) const { |
13158 | 0 | auto [Mask, VL] = getMaskAndVL(Root); |
13159 | 0 | return isMaskCompatible(Mask) && isVLCompatible(VL); |
13160 | 0 | } |
13161 | | |
13162 | | /// Helper function to check if \p N is commutative with respect to the |
13163 | | /// foldings that are supported by this class. |
13164 | 0 | static bool isCommutative(const SDNode *N) { |
13165 | 0 | switch (N->getOpcode()) { |
13166 | 0 | case RISCVISD::ADD_VL: |
13167 | 0 | case RISCVISD::MUL_VL: |
13168 | 0 | case RISCVISD::VWADD_W_VL: |
13169 | 0 | case RISCVISD::VWADDU_W_VL: |
13170 | 0 | return true; |
13171 | 0 | case RISCVISD::SUB_VL: |
13172 | 0 | case RISCVISD::VWSUB_W_VL: |
13173 | 0 | case RISCVISD::VWSUBU_W_VL: |
13174 | 0 | return false; |
13175 | 0 | default: |
13176 | 0 | llvm_unreachable("Unexpected opcode"); |
13177 | 0 | } |
13178 | 0 | } |
13179 | | |
13180 | | /// Get a list of combine to try for folding extensions in \p Root. |
13181 | | /// Note that each returned CombineToTry function doesn't actually modify |
13182 | | /// anything. Instead they produce an optional CombineResult that if not None, |
13183 | | /// need to be materialized for the combine to be applied. |
13184 | | /// \see CombineResult::materialize. |
13185 | | /// If the related CombineToTry function returns std::nullopt, that means the |
13186 | | /// combine didn't match. |
13187 | | static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); |
13188 | | }; |
13189 | | |
13190 | | /// Helper structure that holds all the necessary information to materialize a |
13191 | | /// combine that does some extension folding. |
13192 | | struct CombineResult { |
13193 | | /// Opcode to be generated when materializing the combine. |
13194 | | unsigned TargetOpcode; |
13195 | | // No value means no extension is needed. If extension is needed, the value |
13196 | | // indicates if it needs to be sign extended. |
13197 | | std::optional<bool> SExtLHS; |
13198 | | std::optional<bool> SExtRHS; |
13199 | | /// Root of the combine. |
13200 | | SDNode *Root; |
13201 | | /// LHS of the TargetOpcode. |
13202 | | NodeExtensionHelper LHS; |
13203 | | /// RHS of the TargetOpcode. |
13204 | | NodeExtensionHelper RHS; |
13205 | | |
13206 | | CombineResult(unsigned TargetOpcode, SDNode *Root, |
13207 | | const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS, |
13208 | | const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS) |
13209 | | : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS), |
13210 | 0 | Root(Root), LHS(LHS), RHS(RHS) {} |
13211 | | |
13212 | | /// Return a value that uses TargetOpcode and that can be used to replace |
13213 | | /// Root. |
13214 | | /// The actual replacement is *not* done in that method. |
13215 | 0 | SDValue materialize(SelectionDAG &DAG) const { |
13216 | 0 | SDValue Mask, VL, Merge; |
13217 | 0 | std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); |
13218 | 0 | Merge = Root->getOperand(2); |
13219 | 0 | return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), |
13220 | 0 | LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), |
13221 | 0 | RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, |
13222 | 0 | Mask, VL); |
13223 | 0 | } |
13224 | | }; |
13225 | | |
13226 | | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
13227 | | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
13228 | | /// are zext) and LHS and RHS can be folded into Root. |
13229 | | /// AllowSExt and AllozZExt define which form `ext` can take in this pattern. |
13230 | | /// |
13231 | | /// \note If the pattern can match with both zext and sext, the returned |
13232 | | /// CombineResult will feature the zext result. |
13233 | | /// |
13234 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13235 | | /// can be used to apply the pattern. |
13236 | | static std::optional<CombineResult> |
13237 | | canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, |
13238 | | const NodeExtensionHelper &RHS, bool AllowSExt, |
13239 | 0 | bool AllowZExt) { |
13240 | 0 | assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); |
13241 | 0 | if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) |
13242 | 0 | return std::nullopt; |
13243 | 0 | if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) |
13244 | 0 | return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( |
13245 | 0 | Root->getOpcode(), /*IsSExt=*/false), |
13246 | 0 | Root, LHS, /*SExtLHS=*/false, RHS, |
13247 | 0 | /*SExtRHS=*/false); |
13248 | 0 | if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) |
13249 | 0 | return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( |
13250 | 0 | Root->getOpcode(), /*IsSExt=*/true), |
13251 | 0 | Root, LHS, /*SExtLHS=*/true, RHS, |
13252 | 0 | /*SExtRHS=*/true); |
13253 | 0 | return std::nullopt; |
13254 | 0 | } |
13255 | | |
13256 | | /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) |
13257 | | /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both |
13258 | | /// are zext) and LHS and RHS can be folded into Root. |
13259 | | /// |
13260 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13261 | | /// can be used to apply the pattern. |
13262 | | static std::optional<CombineResult> |
13263 | | canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, |
13264 | 0 | const NodeExtensionHelper &RHS) { |
13265 | 0 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, |
13266 | 0 | /*AllowZExt=*/true); |
13267 | 0 | } |
13268 | | |
13269 | | /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) |
13270 | | /// |
13271 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13272 | | /// can be used to apply the pattern. |
13273 | | static std::optional<CombineResult> |
13274 | | canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, |
13275 | 0 | const NodeExtensionHelper &RHS) { |
13276 | 0 | if (!RHS.areVLAndMaskCompatible(Root)) |
13277 | 0 | return std::nullopt; |
13278 | | |
13279 | | // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar |
13280 | | // sext/zext? |
13281 | | // Control this behavior behind an option (AllowSplatInVW_W) for testing |
13282 | | // purposes. |
13283 | 0 | if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
13284 | 0 | return CombineResult( |
13285 | 0 | NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), |
13286 | 0 | Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); |
13287 | 0 | if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) |
13288 | 0 | return CombineResult( |
13289 | 0 | NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), |
13290 | 0 | Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); |
13291 | 0 | return std::nullopt; |
13292 | 0 | } |
13293 | | |
13294 | | /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) |
13295 | | /// |
13296 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13297 | | /// can be used to apply the pattern. |
13298 | | static std::optional<CombineResult> |
13299 | | canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
13300 | 0 | const NodeExtensionHelper &RHS) { |
13301 | 0 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, |
13302 | 0 | /*AllowZExt=*/false); |
13303 | 0 | } |
13304 | | |
13305 | | /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) |
13306 | | /// |
13307 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13308 | | /// can be used to apply the pattern. |
13309 | | static std::optional<CombineResult> |
13310 | | canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, |
13311 | 0 | const NodeExtensionHelper &RHS) { |
13312 | 0 | return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, |
13313 | 0 | /*AllowZExt=*/true); |
13314 | 0 | } |
13315 | | |
13316 | | /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) |
13317 | | /// |
13318 | | /// \returns std::nullopt if the pattern doesn't match or a CombineResult that |
13319 | | /// can be used to apply the pattern. |
13320 | | static std::optional<CombineResult> |
13321 | | canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, |
13322 | 0 | const NodeExtensionHelper &RHS) { |
13323 | 0 | if (!LHS.SupportsSExt || !RHS.SupportsZExt) |
13324 | 0 | return std::nullopt; |
13325 | 0 | if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) |
13326 | 0 | return std::nullopt; |
13327 | 0 | return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), |
13328 | 0 | Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); |
13329 | 0 | } |
13330 | | |
13331 | | SmallVector<NodeExtensionHelper::CombineToTry> |
13332 | 0 | NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { |
13333 | 0 | SmallVector<CombineToTry> Strategies; |
13334 | 0 | switch (Root->getOpcode()) { |
13335 | 0 | case RISCVISD::ADD_VL: |
13336 | 0 | case RISCVISD::SUB_VL: |
13337 | | // add|sub -> vwadd(u)|vwsub(u) |
13338 | 0 | Strategies.push_back(canFoldToVWWithSameExtension); |
13339 | | // add|sub -> vwadd(u)_w|vwsub(u)_w |
13340 | 0 | Strategies.push_back(canFoldToVW_W); |
13341 | 0 | break; |
13342 | 0 | case RISCVISD::MUL_VL: |
13343 | | // mul -> vwmul(u) |
13344 | 0 | Strategies.push_back(canFoldToVWWithSameExtension); |
13345 | | // mul -> vwmulsu |
13346 | 0 | Strategies.push_back(canFoldToVW_SU); |
13347 | 0 | break; |
13348 | 0 | case RISCVISD::VWADD_W_VL: |
13349 | 0 | case RISCVISD::VWSUB_W_VL: |
13350 | | // vwadd_w|vwsub_w -> vwadd|vwsub |
13351 | 0 | Strategies.push_back(canFoldToVWWithSEXT); |
13352 | 0 | break; |
13353 | 0 | case RISCVISD::VWADDU_W_VL: |
13354 | 0 | case RISCVISD::VWSUBU_W_VL: |
13355 | | // vwaddu_w|vwsubu_w -> vwaddu|vwsubu |
13356 | 0 | Strategies.push_back(canFoldToVWWithZEXT); |
13357 | 0 | break; |
13358 | 0 | default: |
13359 | 0 | llvm_unreachable("Unexpected opcode"); |
13360 | 0 | } |
13361 | 0 | return Strategies; |
13362 | 0 | } |
13363 | | } // End anonymous namespace. |
13364 | | |
13365 | | /// Combine a binary operation to its equivalent VW or VW_W form. |
13366 | | /// The supported combines are: |
13367 | | /// add_vl -> vwadd(u) | vwadd(u)_w |
13368 | | /// sub_vl -> vwsub(u) | vwsub(u)_w |
13369 | | /// mul_vl -> vwmul(u) | vwmul_su |
13370 | | /// vwadd_w(u) -> vwadd(u) |
13371 | | /// vwub_w(u) -> vwadd(u) |
13372 | | static SDValue |
13373 | 0 | combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { |
13374 | 0 | SelectionDAG &DAG = DCI.DAG; |
13375 | |
|
13376 | 0 | assert(NodeExtensionHelper::isSupportedRoot(N) && |
13377 | 0 | "Shouldn't have called this method"); |
13378 | 0 | SmallVector<SDNode *> Worklist; |
13379 | 0 | SmallSet<SDNode *, 8> Inserted; |
13380 | 0 | Worklist.push_back(N); |
13381 | 0 | Inserted.insert(N); |
13382 | 0 | SmallVector<CombineResult> CombinesToApply; |
13383 | |
|
13384 | 0 | while (!Worklist.empty()) { |
13385 | 0 | SDNode *Root = Worklist.pop_back_val(); |
13386 | 0 | if (!NodeExtensionHelper::isSupportedRoot(Root)) |
13387 | 0 | return SDValue(); |
13388 | | |
13389 | 0 | NodeExtensionHelper LHS(N, 0, DAG); |
13390 | 0 | NodeExtensionHelper RHS(N, 1, DAG); |
13391 | 0 | auto AppendUsersIfNeeded = [&Worklist, |
13392 | 0 | &Inserted](const NodeExtensionHelper &Op) { |
13393 | 0 | if (Op.needToPromoteOtherUsers()) { |
13394 | 0 | for (SDNode *TheUse : Op.OrigOperand->uses()) { |
13395 | 0 | if (Inserted.insert(TheUse).second) |
13396 | 0 | Worklist.push_back(TheUse); |
13397 | 0 | } |
13398 | 0 | } |
13399 | 0 | }; |
13400 | | |
13401 | | // Control the compile time by limiting the number of node we look at in |
13402 | | // total. |
13403 | 0 | if (Inserted.size() > ExtensionMaxWebSize) |
13404 | 0 | return SDValue(); |
13405 | | |
13406 | 0 | SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = |
13407 | 0 | NodeExtensionHelper::getSupportedFoldings(N); |
13408 | |
|
13409 | 0 | assert(!FoldingStrategies.empty() && "Nothing to be folded"); |
13410 | 0 | bool Matched = false; |
13411 | 0 | for (int Attempt = 0; |
13412 | 0 | (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; |
13413 | 0 | ++Attempt) { |
13414 | |
|
13415 | 0 | for (NodeExtensionHelper::CombineToTry FoldingStrategy : |
13416 | 0 | FoldingStrategies) { |
13417 | 0 | std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); |
13418 | 0 | if (Res) { |
13419 | 0 | Matched = true; |
13420 | 0 | CombinesToApply.push_back(*Res); |
13421 | | // All the inputs that are extended need to be folded, otherwise |
13422 | | // we would be leaving the old input (since it is may still be used), |
13423 | | // and the new one. |
13424 | 0 | if (Res->SExtLHS.has_value()) |
13425 | 0 | AppendUsersIfNeeded(LHS); |
13426 | 0 | if (Res->SExtRHS.has_value()) |
13427 | 0 | AppendUsersIfNeeded(RHS); |
13428 | 0 | break; |
13429 | 0 | } |
13430 | 0 | } |
13431 | 0 | std::swap(LHS, RHS); |
13432 | 0 | } |
13433 | | // Right now we do an all or nothing approach. |
13434 | 0 | if (!Matched) |
13435 | 0 | return SDValue(); |
13436 | 0 | } |
13437 | | // Store the value for the replacement of the input node separately. |
13438 | 0 | SDValue InputRootReplacement; |
13439 | | // We do the RAUW after we materialize all the combines, because some replaced |
13440 | | // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, |
13441 | | // some of these nodes may appear in the NodeExtensionHelpers of some of the |
13442 | | // yet-to-be-visited CombinesToApply roots. |
13443 | 0 | SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; |
13444 | 0 | ValuesToReplace.reserve(CombinesToApply.size()); |
13445 | 0 | for (CombineResult Res : CombinesToApply) { |
13446 | 0 | SDValue NewValue = Res.materialize(DAG); |
13447 | 0 | if (!InputRootReplacement) { |
13448 | 0 | assert(Res.Root == N && |
13449 | 0 | "First element is expected to be the current node"); |
13450 | 0 | InputRootReplacement = NewValue; |
13451 | 0 | } else { |
13452 | 0 | ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); |
13453 | 0 | } |
13454 | 0 | } |
13455 | 0 | for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { |
13456 | 0 | DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); |
13457 | 0 | DCI.AddToWorklist(OldNewValues.second.getNode()); |
13458 | 0 | } |
13459 | 0 | return InputRootReplacement; |
13460 | 0 | } |
13461 | | |
13462 | | // Helper function for performMemPairCombine. |
13463 | | // Try to combine the memory loads/stores LSNode1 and LSNode2 |
13464 | | // into a single memory pair operation. |
13465 | | static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, |
13466 | | LSBaseSDNode *LSNode2, SDValue BasePtr, |
13467 | 0 | uint64_t Imm) { |
13468 | 0 | SmallPtrSet<const SDNode *, 32> Visited; |
13469 | 0 | SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2}; |
13470 | |
|
13471 | 0 | if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) || |
13472 | 0 | SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist)) |
13473 | 0 | return SDValue(); |
13474 | | |
13475 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
13476 | 0 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
13477 | | |
13478 | | // The new operation has twice the width. |
13479 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
13480 | 0 | EVT MemVT = LSNode1->getMemoryVT(); |
13481 | 0 | EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128; |
13482 | 0 | MachineMemOperand *MMO = LSNode1->getMemOperand(); |
13483 | 0 | MachineMemOperand *NewMMO = MF.getMachineMemOperand( |
13484 | 0 | MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16); |
13485 | |
|
13486 | 0 | if (LSNode1->getOpcode() == ISD::LOAD) { |
13487 | 0 | auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType(); |
13488 | 0 | unsigned Opcode; |
13489 | 0 | if (MemVT == MVT::i32) |
13490 | 0 | Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD; |
13491 | 0 | else |
13492 | 0 | Opcode = RISCVISD::TH_LDD; |
13493 | |
|
13494 | 0 | SDValue Res = DAG.getMemIntrinsicNode( |
13495 | 0 | Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}), |
13496 | 0 | {LSNode1->getChain(), BasePtr, |
13497 | 0 | DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, |
13498 | 0 | NewMemVT, NewMMO); |
13499 | |
|
13500 | 0 | SDValue Node1 = |
13501 | 0 | DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1)); |
13502 | 0 | SDValue Node2 = |
13503 | 0 | DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2)); |
13504 | |
|
13505 | 0 | DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode()); |
13506 | 0 | return Node1; |
13507 | 0 | } else { |
13508 | 0 | unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD; |
13509 | |
|
13510 | 0 | SDValue Res = DAG.getMemIntrinsicNode( |
13511 | 0 | Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other), |
13512 | 0 | {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1), |
13513 | 0 | BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)}, |
13514 | 0 | NewMemVT, NewMMO); |
13515 | |
|
13516 | 0 | DAG.ReplaceAllUsesWith(LSNode2, Res.getNode()); |
13517 | 0 | return Res; |
13518 | 0 | } |
13519 | 0 | } |
13520 | | |
13521 | | // Try to combine two adjacent loads/stores to a single pair instruction from |
13522 | | // the XTHeadMemPair vendor extension. |
13523 | | static SDValue performMemPairCombine(SDNode *N, |
13524 | 0 | TargetLowering::DAGCombinerInfo &DCI) { |
13525 | 0 | SelectionDAG &DAG = DCI.DAG; |
13526 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
13527 | 0 | const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>(); |
13528 | | |
13529 | | // Target does not support load/store pair. |
13530 | 0 | if (!Subtarget.hasVendorXTHeadMemPair()) |
13531 | 0 | return SDValue(); |
13532 | | |
13533 | 0 | LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N); |
13534 | 0 | EVT MemVT = LSNode1->getMemoryVT(); |
13535 | 0 | unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2; |
13536 | | |
13537 | | // No volatile, indexed or atomic loads/stores. |
13538 | 0 | if (!LSNode1->isSimple() || LSNode1->isIndexed()) |
13539 | 0 | return SDValue(); |
13540 | | |
13541 | | // Function to get a base + constant representation from a memory value. |
13542 | 0 | auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> { |
13543 | 0 | if (Ptr->getOpcode() == ISD::ADD) |
13544 | 0 | if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) |
13545 | 0 | return {Ptr->getOperand(0), C1->getZExtValue()}; |
13546 | 0 | return {Ptr, 0}; |
13547 | 0 | }; |
13548 | |
|
13549 | 0 | auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum)); |
13550 | |
|
13551 | 0 | SDValue Chain = N->getOperand(0); |
13552 | 0 | for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end(); |
13553 | 0 | UI != UE; ++UI) { |
13554 | 0 | SDUse &Use = UI.getUse(); |
13555 | 0 | if (Use.getUser() != N && Use.getResNo() == 0 && |
13556 | 0 | Use.getUser()->getOpcode() == N->getOpcode()) { |
13557 | 0 | LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser()); |
13558 | | |
13559 | | // No volatile, indexed or atomic loads/stores. |
13560 | 0 | if (!LSNode2->isSimple() || LSNode2->isIndexed()) |
13561 | 0 | continue; |
13562 | | |
13563 | | // Check if LSNode1 and LSNode2 have the same type and extension. |
13564 | 0 | if (LSNode1->getOpcode() == ISD::LOAD) |
13565 | 0 | if (cast<LoadSDNode>(LSNode2)->getExtensionType() != |
13566 | 0 | cast<LoadSDNode>(LSNode1)->getExtensionType()) |
13567 | 0 | continue; |
13568 | | |
13569 | 0 | if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT()) |
13570 | 0 | continue; |
13571 | | |
13572 | 0 | auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum)); |
13573 | | |
13574 | | // Check if the base pointer is the same for both instruction. |
13575 | 0 | if (Base1 != Base2) |
13576 | 0 | continue; |
13577 | | |
13578 | | // Check if the offsets match the XTHeadMemPair encoding contraints. |
13579 | 0 | bool Valid = false; |
13580 | 0 | if (MemVT == MVT::i32) { |
13581 | | // Check for adjacent i32 values and a 2-bit index. |
13582 | 0 | if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1)) |
13583 | 0 | Valid = true; |
13584 | 0 | } else if (MemVT == MVT::i64) { |
13585 | | // Check for adjacent i64 values and a 2-bit index. |
13586 | 0 | if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1)) |
13587 | 0 | Valid = true; |
13588 | 0 | } |
13589 | |
|
13590 | 0 | if (!Valid) |
13591 | 0 | continue; |
13592 | | |
13593 | | // Try to combine. |
13594 | 0 | if (SDValue Res = |
13595 | 0 | tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1)) |
13596 | 0 | return Res; |
13597 | 0 | } |
13598 | 0 | } |
13599 | | |
13600 | 0 | return SDValue(); |
13601 | 0 | } |
13602 | | |
13603 | | // Fold |
13604 | | // (fp_to_int (froundeven X)) -> fcvt X, rne |
13605 | | // (fp_to_int (ftrunc X)) -> fcvt X, rtz |
13606 | | // (fp_to_int (ffloor X)) -> fcvt X, rdn |
13607 | | // (fp_to_int (fceil X)) -> fcvt X, rup |
13608 | | // (fp_to_int (fround X)) -> fcvt X, rmm |
13609 | | // (fp_to_int (frint X)) -> fcvt X |
13610 | | static SDValue performFP_TO_INTCombine(SDNode *N, |
13611 | | TargetLowering::DAGCombinerInfo &DCI, |
13612 | 0 | const RISCVSubtarget &Subtarget) { |
13613 | 0 | SelectionDAG &DAG = DCI.DAG; |
13614 | 0 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
13615 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
13616 | |
|
13617 | 0 | SDValue Src = N->getOperand(0); |
13618 | | |
13619 | | // Don't do this for strict-fp Src. |
13620 | 0 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
13621 | 0 | return SDValue(); |
13622 | | |
13623 | | // Ensure the FP type is legal. |
13624 | 0 | if (!TLI.isTypeLegal(Src.getValueType())) |
13625 | 0 | return SDValue(); |
13626 | | |
13627 | | // Don't do this for f16 with Zfhmin and not Zfh. |
13628 | 0 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
13629 | 0 | return SDValue(); |
13630 | | |
13631 | 0 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); |
13632 | | // If the result is invalid, we didn't find a foldable instruction. |
13633 | 0 | if (FRM == RISCVFPRndMode::Invalid) |
13634 | 0 | return SDValue(); |
13635 | | |
13636 | 0 | SDLoc DL(N); |
13637 | 0 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; |
13638 | 0 | EVT VT = N->getValueType(0); |
13639 | |
|
13640 | 0 | if (VT.isVector() && TLI.isTypeLegal(VT)) { |
13641 | 0 | MVT SrcVT = Src.getSimpleValueType(); |
13642 | 0 | MVT SrcContainerVT = SrcVT; |
13643 | 0 | MVT ContainerVT = VT.getSimpleVT(); |
13644 | 0 | SDValue XVal = Src.getOperand(0); |
13645 | | |
13646 | | // For widening and narrowing conversions we just combine it into a |
13647 | | // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They |
13648 | | // end up getting lowered to their appropriate pseudo instructions based on |
13649 | | // their operand types |
13650 | 0 | if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || |
13651 | 0 | VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) |
13652 | 0 | return SDValue(); |
13653 | | |
13654 | | // Make fixed-length vectors scalable first |
13655 | 0 | if (SrcVT.isFixedLengthVector()) { |
13656 | 0 | SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); |
13657 | 0 | XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); |
13658 | 0 | ContainerVT = |
13659 | 0 | getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); |
13660 | 0 | } |
13661 | |
|
13662 | 0 | auto [Mask, VL] = |
13663 | 0 | getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); |
13664 | |
|
13665 | 0 | SDValue FpToInt; |
13666 | 0 | if (FRM == RISCVFPRndMode::RTZ) { |
13667 | | // Use the dedicated trunc static rounding mode if we're truncating so we |
13668 | | // don't need to generate calls to fsrmi/fsrm |
13669 | 0 | unsigned Opc = |
13670 | 0 | IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; |
13671 | 0 | FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); |
13672 | 0 | } else if (FRM == RISCVFPRndMode::DYN) { |
13673 | 0 | unsigned Opc = |
13674 | 0 | IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL; |
13675 | 0 | FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); |
13676 | 0 | } else { |
13677 | 0 | unsigned Opc = |
13678 | 0 | IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; |
13679 | 0 | FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, |
13680 | 0 | DAG.getTargetConstant(FRM, DL, XLenVT), VL); |
13681 | 0 | } |
13682 | | |
13683 | | // If converted from fixed-length to scalable, convert back |
13684 | 0 | if (VT.isFixedLengthVector()) |
13685 | 0 | FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); |
13686 | |
|
13687 | 0 | return FpToInt; |
13688 | 0 | } |
13689 | | |
13690 | | // Only handle XLen or i32 types. Other types narrower than XLen will |
13691 | | // eventually be legalized to XLenVT. |
13692 | 0 | if (VT != MVT::i32 && VT != XLenVT) |
13693 | 0 | return SDValue(); |
13694 | | |
13695 | 0 | unsigned Opc; |
13696 | 0 | if (VT == XLenVT) |
13697 | 0 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
13698 | 0 | else |
13699 | 0 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
13700 | |
|
13701 | 0 | SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), |
13702 | 0 | DAG.getTargetConstant(FRM, DL, XLenVT)); |
13703 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); |
13704 | 0 | } |
13705 | | |
13706 | | // Fold |
13707 | | // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) |
13708 | | // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) |
13709 | | // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) |
13710 | | // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) |
13711 | | // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) |
13712 | | // (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn)) |
13713 | | static SDValue performFP_TO_INT_SATCombine(SDNode *N, |
13714 | | TargetLowering::DAGCombinerInfo &DCI, |
13715 | 0 | const RISCVSubtarget &Subtarget) { |
13716 | 0 | SelectionDAG &DAG = DCI.DAG; |
13717 | 0 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
13718 | 0 | MVT XLenVT = Subtarget.getXLenVT(); |
13719 | | |
13720 | | // Only handle XLen types. Other types narrower than XLen will eventually be |
13721 | | // legalized to XLenVT. |
13722 | 0 | EVT DstVT = N->getValueType(0); |
13723 | 0 | if (DstVT != XLenVT) |
13724 | 0 | return SDValue(); |
13725 | | |
13726 | 0 | SDValue Src = N->getOperand(0); |
13727 | | |
13728 | | // Don't do this for strict-fp Src. |
13729 | 0 | if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode()) |
13730 | 0 | return SDValue(); |
13731 | | |
13732 | | // Ensure the FP type is also legal. |
13733 | 0 | if (!TLI.isTypeLegal(Src.getValueType())) |
13734 | 0 | return SDValue(); |
13735 | | |
13736 | | // Don't do this for f16 with Zfhmin and not Zfh. |
13737 | 0 | if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) |
13738 | 0 | return SDValue(); |
13739 | | |
13740 | 0 | EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
13741 | |
|
13742 | 0 | RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); |
13743 | 0 | if (FRM == RISCVFPRndMode::Invalid) |
13744 | 0 | return SDValue(); |
13745 | | |
13746 | 0 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; |
13747 | |
|
13748 | 0 | unsigned Opc; |
13749 | 0 | if (SatVT == DstVT) |
13750 | 0 | Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; |
13751 | 0 | else if (DstVT == MVT::i64 && SatVT == MVT::i32) |
13752 | 0 | Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; |
13753 | 0 | else |
13754 | 0 | return SDValue(); |
13755 | | // FIXME: Support other SatVTs by clamping before or after the conversion. |
13756 | | |
13757 | 0 | Src = Src.getOperand(0); |
13758 | |
|
13759 | 0 | SDLoc DL(N); |
13760 | 0 | SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, |
13761 | 0 | DAG.getTargetConstant(FRM, DL, XLenVT)); |
13762 | | |
13763 | | // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero |
13764 | | // extend. |
13765 | 0 | if (Opc == RISCVISD::FCVT_WU_RV64) |
13766 | 0 | FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); |
13767 | | |
13768 | | // RISC-V FP-to-int conversions saturate to the destination register size, but |
13769 | | // don't produce 0 for nan. |
13770 | 0 | SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); |
13771 | 0 | return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); |
13772 | 0 | } |
13773 | | |
13774 | | // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is |
13775 | | // smaller than XLenVT. |
13776 | | static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, |
13777 | 0 | const RISCVSubtarget &Subtarget) { |
13778 | 0 | assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); |
13779 | | |
13780 | 0 | SDValue Src = N->getOperand(0); |
13781 | 0 | if (Src.getOpcode() != ISD::BSWAP) |
13782 | 0 | return SDValue(); |
13783 | | |
13784 | 0 | EVT VT = N->getValueType(0); |
13785 | 0 | if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || |
13786 | 0 | !llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) |
13787 | 0 | return SDValue(); |
13788 | | |
13789 | 0 | SDLoc DL(N); |
13790 | 0 | return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); |
13791 | 0 | } |
13792 | | |
13793 | | // Convert from one FMA opcode to another based on whether we are negating the |
13794 | | // multiply result and/or the accumulator. |
13795 | | // NOTE: Only supports RVV operations with VL. |
13796 | 0 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { |
13797 | | // Negating the multiply result changes ADD<->SUB and toggles 'N'. |
13798 | 0 | if (NegMul) { |
13799 | | // clang-format off |
13800 | 0 | switch (Opcode) { |
13801 | 0 | default: llvm_unreachable("Unexpected opcode"); |
13802 | 0 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
13803 | 0 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
13804 | 0 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
13805 | 0 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
13806 | 0 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
13807 | 0 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
13808 | 0 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
13809 | 0 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
13810 | 0 | } |
13811 | | // clang-format on |
13812 | 0 | } |
13813 | | |
13814 | | // Negating the accumulator changes ADD<->SUB. |
13815 | 0 | if (NegAcc) { |
13816 | | // clang-format off |
13817 | 0 | switch (Opcode) { |
13818 | 0 | default: llvm_unreachable("Unexpected opcode"); |
13819 | 0 | case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; |
13820 | 0 | case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; |
13821 | 0 | case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; |
13822 | 0 | case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; |
13823 | 0 | case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break; |
13824 | 0 | case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break; |
13825 | 0 | case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break; |
13826 | 0 | case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break; |
13827 | 0 | } |
13828 | | // clang-format on |
13829 | 0 | } |
13830 | | |
13831 | 0 | return Opcode; |
13832 | 0 | } |
13833 | | |
13834 | 0 | static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) { |
13835 | | // Fold FNEG_VL into FMA opcodes. |
13836 | | // The first operand of strict-fp is chain. |
13837 | 0 | unsigned Offset = N->isTargetStrictFPOpcode(); |
13838 | 0 | SDValue A = N->getOperand(0 + Offset); |
13839 | 0 | SDValue B = N->getOperand(1 + Offset); |
13840 | 0 | SDValue C = N->getOperand(2 + Offset); |
13841 | 0 | SDValue Mask = N->getOperand(3 + Offset); |
13842 | 0 | SDValue VL = N->getOperand(4 + Offset); |
13843 | |
|
13844 | 0 | auto invertIfNegative = [&Mask, &VL](SDValue &V) { |
13845 | 0 | if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && |
13846 | 0 | V.getOperand(2) == VL) { |
13847 | | // Return the negated input. |
13848 | 0 | V = V.getOperand(0); |
13849 | 0 | return true; |
13850 | 0 | } |
13851 | | |
13852 | 0 | return false; |
13853 | 0 | }; |
13854 | |
|
13855 | 0 | bool NegA = invertIfNegative(A); |
13856 | 0 | bool NegB = invertIfNegative(B); |
13857 | 0 | bool NegC = invertIfNegative(C); |
13858 | | |
13859 | | // If no operands are negated, we're done. |
13860 | 0 | if (!NegA && !NegB && !NegC) |
13861 | 0 | return SDValue(); |
13862 | | |
13863 | 0 | unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); |
13864 | 0 | if (N->isTargetStrictFPOpcode()) |
13865 | 0 | return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(), |
13866 | 0 | {N->getOperand(0), A, B, C, Mask, VL}); |
13867 | 0 | return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, |
13868 | 0 | VL); |
13869 | 0 | } |
13870 | | |
13871 | | static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, |
13872 | 0 | const RISCVSubtarget &Subtarget) { |
13873 | 0 | if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG)) |
13874 | 0 | return V; |
13875 | | |
13876 | 0 | if (N->getValueType(0).isScalableVector() && |
13877 | 0 | N->getValueType(0).getVectorElementType() == MVT::f32 && |
13878 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
13879 | 0 | !Subtarget.hasVInstructionsF16())) { |
13880 | 0 | return SDValue(); |
13881 | 0 | } |
13882 | | |
13883 | | // FIXME: Ignore strict opcodes for now. |
13884 | 0 | if (N->isTargetStrictFPOpcode()) |
13885 | 0 | return SDValue(); |
13886 | | |
13887 | | // Try to form widening FMA. |
13888 | 0 | SDValue Op0 = N->getOperand(0); |
13889 | 0 | SDValue Op1 = N->getOperand(1); |
13890 | 0 | SDValue Mask = N->getOperand(3); |
13891 | 0 | SDValue VL = N->getOperand(4); |
13892 | |
|
13893 | 0 | if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || |
13894 | 0 | Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) |
13895 | 0 | return SDValue(); |
13896 | | |
13897 | | // TODO: Refactor to handle more complex cases similar to |
13898 | | // combineBinOp_VLToVWBinOp_VL. |
13899 | 0 | if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && |
13900 | 0 | (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) |
13901 | 0 | return SDValue(); |
13902 | | |
13903 | | // Check the mask and VL are the same. |
13904 | 0 | if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || |
13905 | 0 | Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) |
13906 | 0 | return SDValue(); |
13907 | | |
13908 | 0 | unsigned NewOpc; |
13909 | 0 | switch (N->getOpcode()) { |
13910 | 0 | default: |
13911 | 0 | llvm_unreachable("Unexpected opcode"); |
13912 | 0 | case RISCVISD::VFMADD_VL: |
13913 | 0 | NewOpc = RISCVISD::VFWMADD_VL; |
13914 | 0 | break; |
13915 | 0 | case RISCVISD::VFNMSUB_VL: |
13916 | 0 | NewOpc = RISCVISD::VFWNMSUB_VL; |
13917 | 0 | break; |
13918 | 0 | case RISCVISD::VFNMADD_VL: |
13919 | 0 | NewOpc = RISCVISD::VFWNMADD_VL; |
13920 | 0 | break; |
13921 | 0 | case RISCVISD::VFMSUB_VL: |
13922 | 0 | NewOpc = RISCVISD::VFWMSUB_VL; |
13923 | 0 | break; |
13924 | 0 | } |
13925 | | |
13926 | 0 | Op0 = Op0.getOperand(0); |
13927 | 0 | Op1 = Op1.getOperand(0); |
13928 | |
|
13929 | 0 | return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1, |
13930 | 0 | N->getOperand(2), Mask, VL); |
13931 | 0 | } |
13932 | | |
13933 | | static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG, |
13934 | 0 | const RISCVSubtarget &Subtarget) { |
13935 | 0 | if (N->getValueType(0).isScalableVector() && |
13936 | 0 | N->getValueType(0).getVectorElementType() == MVT::f32 && |
13937 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
13938 | 0 | !Subtarget.hasVInstructionsF16())) { |
13939 | 0 | return SDValue(); |
13940 | 0 | } |
13941 | | |
13942 | | // FIXME: Ignore strict opcodes for now. |
13943 | 0 | assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode"); |
13944 | | |
13945 | | // Try to form widening multiply. |
13946 | 0 | SDValue Op0 = N->getOperand(0); |
13947 | 0 | SDValue Op1 = N->getOperand(1); |
13948 | 0 | SDValue Merge = N->getOperand(2); |
13949 | 0 | SDValue Mask = N->getOperand(3); |
13950 | 0 | SDValue VL = N->getOperand(4); |
13951 | |
|
13952 | 0 | if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL || |
13953 | 0 | Op1.getOpcode() != RISCVISD::FP_EXTEND_VL) |
13954 | 0 | return SDValue(); |
13955 | | |
13956 | | // TODO: Refactor to handle more complex cases similar to |
13957 | | // combineBinOp_VLToVWBinOp_VL. |
13958 | 0 | if ((!Op0.hasOneUse() || !Op1.hasOneUse()) && |
13959 | 0 | (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0))) |
13960 | 0 | return SDValue(); |
13961 | | |
13962 | | // Check the mask and VL are the same. |
13963 | 0 | if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL || |
13964 | 0 | Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) |
13965 | 0 | return SDValue(); |
13966 | | |
13967 | 0 | Op0 = Op0.getOperand(0); |
13968 | 0 | Op1 = Op1.getOperand(0); |
13969 | |
|
13970 | 0 | return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0, |
13971 | 0 | Op1, Merge, Mask, VL); |
13972 | 0 | } |
13973 | | |
13974 | | static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG, |
13975 | 0 | const RISCVSubtarget &Subtarget) { |
13976 | 0 | if (N->getValueType(0).isScalableVector() && |
13977 | 0 | N->getValueType(0).getVectorElementType() == MVT::f32 && |
13978 | 0 | (Subtarget.hasVInstructionsF16Minimal() && |
13979 | 0 | !Subtarget.hasVInstructionsF16())) { |
13980 | 0 | return SDValue(); |
13981 | 0 | } |
13982 | | |
13983 | 0 | SDValue Op0 = N->getOperand(0); |
13984 | 0 | SDValue Op1 = N->getOperand(1); |
13985 | 0 | SDValue Merge = N->getOperand(2); |
13986 | 0 | SDValue Mask = N->getOperand(3); |
13987 | 0 | SDValue VL = N->getOperand(4); |
13988 | |
|
13989 | 0 | bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL; |
13990 | | |
13991 | | // Look for foldable FP_EXTENDS. |
13992 | 0 | bool Op0IsExtend = |
13993 | 0 | Op0.getOpcode() == RISCVISD::FP_EXTEND_VL && |
13994 | 0 | (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0))); |
13995 | 0 | bool Op1IsExtend = |
13996 | 0 | (Op0 == Op1 && Op0IsExtend) || |
13997 | 0 | (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse()); |
13998 | | |
13999 | | // Check the mask and VL. |
14000 | 0 | if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)) |
14001 | 0 | Op0IsExtend = false; |
14002 | 0 | if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)) |
14003 | 0 | Op1IsExtend = false; |
14004 | | |
14005 | | // Canonicalize. |
14006 | 0 | if (!Op1IsExtend) { |
14007 | | // Sub requires at least operand 1 to be an extend. |
14008 | 0 | if (!IsAdd) |
14009 | 0 | return SDValue(); |
14010 | | |
14011 | | // Add is commutable, if the other operand is foldable, swap them. |
14012 | 0 | if (!Op0IsExtend) |
14013 | 0 | return SDValue(); |
14014 | | |
14015 | 0 | std::swap(Op0, Op1); |
14016 | 0 | std::swap(Op0IsExtend, Op1IsExtend); |
14017 | 0 | } |
14018 | | |
14019 | | // Op1 is a foldable extend. Op0 might be foldable. |
14020 | 0 | Op1 = Op1.getOperand(0); |
14021 | 0 | if (Op0IsExtend) |
14022 | 0 | Op0 = Op0.getOperand(0); |
14023 | |
|
14024 | 0 | unsigned Opc; |
14025 | 0 | if (IsAdd) |
14026 | 0 | Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL; |
14027 | 0 | else |
14028 | 0 | Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL; |
14029 | |
|
14030 | 0 | return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask, |
14031 | 0 | VL); |
14032 | 0 | } |
14033 | | |
14034 | | static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, |
14035 | 11.5k | const RISCVSubtarget &Subtarget) { |
14036 | 11.5k | assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); |
14037 | | |
14038 | 11.5k | if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) |
14039 | 3.33k | return SDValue(); |
14040 | | |
14041 | 8.16k | if (!isa<ConstantSDNode>(N->getOperand(1))) |
14042 | 3.49k | return SDValue(); |
14043 | 4.66k | uint64_t ShAmt = N->getConstantOperandVal(1); |
14044 | 4.66k | if (ShAmt > 32) |
14045 | 4.57k | return SDValue(); |
14046 | | |
14047 | 89 | SDValue N0 = N->getOperand(0); |
14048 | | |
14049 | | // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> |
14050 | | // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of |
14051 | | // SLLIW+SRAIW. SLLI+SRAI have compressed forms. |
14052 | 89 | if (ShAmt < 32 && |
14053 | 89 | N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && |
14054 | 89 | cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && |
14055 | 89 | N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && |
14056 | 89 | isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { |
14057 | 0 | uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); |
14058 | 0 | if (LShAmt < 32) { |
14059 | 0 | SDLoc ShlDL(N0.getOperand(0)); |
14060 | 0 | SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, |
14061 | 0 | N0.getOperand(0).getOperand(0), |
14062 | 0 | DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); |
14063 | 0 | SDLoc DL(N); |
14064 | 0 | return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, |
14065 | 0 | DAG.getConstant(ShAmt + 32, DL, MVT::i64)); |
14066 | 0 | } |
14067 | 0 | } |
14068 | | |
14069 | | // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) |
14070 | | // FIXME: Should this be a generic combine? There's a similar combine on X86. |
14071 | | // |
14072 | | // Also try these folds where an add or sub is in the middle. |
14073 | | // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) |
14074 | | // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) |
14075 | 89 | SDValue Shl; |
14076 | 89 | ConstantSDNode *AddC = nullptr; |
14077 | | |
14078 | | // We might have an ADD or SUB between the SRA and SHL. |
14079 | 89 | bool IsAdd = N0.getOpcode() == ISD::ADD; |
14080 | 89 | if ((IsAdd || N0.getOpcode() == ISD::SUB)) { |
14081 | | // Other operand needs to be a constant we can modify. |
14082 | 37 | AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); |
14083 | 37 | if (!AddC) |
14084 | 34 | return SDValue(); |
14085 | | |
14086 | | // AddC needs to have at least 32 trailing zeros. |
14087 | 3 | if (AddC->getAPIntValue().countr_zero() < 32) |
14088 | 3 | return SDValue(); |
14089 | | |
14090 | | // All users should be a shift by constant less than or equal to 32. This |
14091 | | // ensures we'll do this optimization for each of them to produce an |
14092 | | // add/sub+sext_inreg they can all share. |
14093 | 0 | for (SDNode *U : N0->uses()) { |
14094 | 0 | if (U->getOpcode() != ISD::SRA || |
14095 | 0 | !isa<ConstantSDNode>(U->getOperand(1)) || |
14096 | 0 | U->getConstantOperandVal(1) > 32) |
14097 | 0 | return SDValue(); |
14098 | 0 | } |
14099 | | |
14100 | 0 | Shl = N0.getOperand(IsAdd ? 0 : 1); |
14101 | 52 | } else { |
14102 | | // Not an ADD or SUB. |
14103 | 52 | Shl = N0; |
14104 | 52 | } |
14105 | | |
14106 | | // Look for a shift left by 32. |
14107 | 52 | if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || |
14108 | 52 | Shl.getConstantOperandVal(1) != 32) |
14109 | 52 | return SDValue(); |
14110 | | |
14111 | | // We if we didn't look through an add/sub, then the shl should have one use. |
14112 | | // If we did look through an add/sub, the sext_inreg we create is free so |
14113 | | // we're only creating 2 new instructions. It's enough to only remove the |
14114 | | // original sra+add/sub. |
14115 | 0 | if (!AddC && !Shl.hasOneUse()) |
14116 | 0 | return SDValue(); |
14117 | | |
14118 | 0 | SDLoc DL(N); |
14119 | 0 | SDValue In = Shl.getOperand(0); |
14120 | | |
14121 | | // If we looked through an ADD or SUB, we need to rebuild it with the shifted |
14122 | | // constant. |
14123 | 0 | if (AddC) { |
14124 | 0 | SDValue ShiftedAddC = |
14125 | 0 | DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); |
14126 | 0 | if (IsAdd) |
14127 | 0 | In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); |
14128 | 0 | else |
14129 | 0 | In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); |
14130 | 0 | } |
14131 | |
|
14132 | 0 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, |
14133 | 0 | DAG.getValueType(MVT::i32)); |
14134 | 0 | if (ShAmt == 32) |
14135 | 0 | return SExt; |
14136 | | |
14137 | 0 | return DAG.getNode( |
14138 | 0 | ISD::SHL, DL, MVT::i64, SExt, |
14139 | 0 | DAG.getConstant(32 - ShAmt, DL, MVT::i64)); |
14140 | 0 | } |
14141 | | |
14142 | | // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if |
14143 | | // the result is used as the conditon of a br_cc or select_cc we can invert, |
14144 | | // inverting the setcc is free, and Z is 0/1. Caller will invert the |
14145 | | // br_cc/select_cc. |
14146 | 2.39k | static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { |
14147 | 2.39k | bool IsAnd = Cond.getOpcode() == ISD::AND; |
14148 | 2.39k | if (!IsAnd && Cond.getOpcode() != ISD::OR) |
14149 | 1.00k | return SDValue(); |
14150 | | |
14151 | 1.39k | if (!Cond.hasOneUse()) |
14152 | 480 | return SDValue(); |
14153 | | |
14154 | 918 | SDValue Setcc = Cond.getOperand(0); |
14155 | 918 | SDValue Xor = Cond.getOperand(1); |
14156 | | // Canonicalize setcc to LHS. |
14157 | 918 | if (Setcc.getOpcode() != ISD::SETCC) |
14158 | 902 | std::swap(Setcc, Xor); |
14159 | | // LHS should be a setcc and RHS should be an xor. |
14160 | 918 | if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || |
14161 | 918 | Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) |
14162 | 918 | return SDValue(); |
14163 | | |
14164 | | // If the condition is an And, SimplifyDemandedBits may have changed |
14165 | | // (xor Z, 1) to (not Z). |
14166 | 0 | SDValue Xor1 = Xor.getOperand(1); |
14167 | 0 | if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) |
14168 | 0 | return SDValue(); |
14169 | | |
14170 | 0 | EVT VT = Cond.getValueType(); |
14171 | 0 | SDValue Xor0 = Xor.getOperand(0); |
14172 | | |
14173 | | // The LHS of the xor needs to be 0/1. |
14174 | 0 | APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); |
14175 | 0 | if (!DAG.MaskedValueIsZero(Xor0, Mask)) |
14176 | 0 | return SDValue(); |
14177 | | |
14178 | | // We can only invert integer setccs. |
14179 | 0 | EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); |
14180 | 0 | if (!SetCCOpVT.isScalarInteger()) |
14181 | 0 | return SDValue(); |
14182 | | |
14183 | 0 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); |
14184 | 0 | if (ISD::isIntEqualitySetCC(CCVal)) { |
14185 | 0 | CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); |
14186 | 0 | Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), |
14187 | 0 | Setcc.getOperand(1), CCVal); |
14188 | 0 | } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { |
14189 | | // Invert (setlt 0, X) by converting to (setlt X, 1). |
14190 | 0 | Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), |
14191 | 0 | DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); |
14192 | 0 | } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { |
14193 | | // (setlt X, 1) by converting to (setlt 0, X). |
14194 | 0 | Setcc = DAG.getSetCC(SDLoc(Setcc), VT, |
14195 | 0 | DAG.getConstant(0, SDLoc(Setcc), VT), |
14196 | 0 | Setcc.getOperand(0), CCVal); |
14197 | 0 | } else |
14198 | 0 | return SDValue(); |
14199 | | |
14200 | 0 | unsigned Opc = IsAnd ? ISD::OR : ISD::AND; |
14201 | 0 | return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); |
14202 | 0 | } |
14203 | | |
14204 | | // Perform common combines for BR_CC and SELECT_CC condtions. |
14205 | | static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, |
14206 | 3.24k | SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { |
14207 | 3.24k | ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); |
14208 | | |
14209 | | // As far as arithmetic right shift always saves the sign, |
14210 | | // shift can be omitted. |
14211 | | // Fold setlt (sra X, N), 0 -> setlt X, 0 and |
14212 | | // setge (sra X, N), 0 -> setge X, 0 |
14213 | 3.24k | if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) && |
14214 | 3.24k | LHS.getOpcode() == ISD::SRA) { |
14215 | 0 | LHS = LHS.getOperand(0); |
14216 | 0 | return true; |
14217 | 0 | } |
14218 | | |
14219 | 3.24k | if (!ISD::isIntEqualitySetCC(CCVal)) |
14220 | 724 | return false; |
14221 | | |
14222 | | // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) |
14223 | | // Sometimes the setcc is introduced after br_cc/select_cc has been formed. |
14224 | 2.52k | if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && |
14225 | 2.52k | LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { |
14226 | | // If we're looking for eq 0 instead of ne 0, we need to invert the |
14227 | | // condition. |
14228 | 1 | bool Invert = CCVal == ISD::SETEQ; |
14229 | 1 | CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); |
14230 | 1 | if (Invert) |
14231 | 0 | CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); |
14232 | | |
14233 | 1 | RHS = LHS.getOperand(1); |
14234 | 1 | LHS = LHS.getOperand(0); |
14235 | 1 | translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); |
14236 | | |
14237 | 1 | CC = DAG.getCondCode(CCVal); |
14238 | 1 | return true; |
14239 | 1 | } |
14240 | | |
14241 | | // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) |
14242 | 2.52k | if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { |
14243 | 0 | RHS = LHS.getOperand(1); |
14244 | 0 | LHS = LHS.getOperand(0); |
14245 | 0 | return true; |
14246 | 0 | } |
14247 | | |
14248 | | // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) |
14249 | 2.52k | if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && |
14250 | 2.52k | LHS.getOperand(1).getOpcode() == ISD::Constant) { |
14251 | 0 | SDValue LHS0 = LHS.getOperand(0); |
14252 | 0 | if (LHS0.getOpcode() == ISD::AND && |
14253 | 0 | LHS0.getOperand(1).getOpcode() == ISD::Constant) { |
14254 | 0 | uint64_t Mask = LHS0.getConstantOperandVal(1); |
14255 | 0 | uint64_t ShAmt = LHS.getConstantOperandVal(1); |
14256 | 0 | if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { |
14257 | 0 | CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
14258 | 0 | CC = DAG.getCondCode(CCVal); |
14259 | |
|
14260 | 0 | ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; |
14261 | 0 | LHS = LHS0.getOperand(0); |
14262 | 0 | if (ShAmt != 0) |
14263 | 0 | LHS = |
14264 | 0 | DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), |
14265 | 0 | DAG.getConstant(ShAmt, DL, LHS.getValueType())); |
14266 | 0 | return true; |
14267 | 0 | } |
14268 | 0 | } |
14269 | 0 | } |
14270 | | |
14271 | | // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. |
14272 | | // This can occur when legalizing some floating point comparisons. |
14273 | 2.52k | APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); |
14274 | 2.52k | if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { |
14275 | 3 | CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); |
14276 | 3 | CC = DAG.getCondCode(CCVal); |
14277 | 3 | RHS = DAG.getConstant(0, DL, LHS.getValueType()); |
14278 | 3 | return true; |
14279 | 3 | } |
14280 | | |
14281 | 2.52k | if (isNullConstant(RHS)) { |
14282 | 2.39k | if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { |
14283 | 0 | CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); |
14284 | 0 | CC = DAG.getCondCode(CCVal); |
14285 | 0 | LHS = NewCond; |
14286 | 0 | return true; |
14287 | 0 | } |
14288 | 2.39k | } |
14289 | | |
14290 | 2.52k | return false; |
14291 | 2.52k | } |
14292 | | |
14293 | | // Fold |
14294 | | // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). |
14295 | | // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). |
14296 | | // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). |
14297 | | // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). |
14298 | | static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, |
14299 | | SDValue TrueVal, SDValue FalseVal, |
14300 | 4.73k | bool Swapped) { |
14301 | 4.73k | bool Commutative = true; |
14302 | 4.73k | unsigned Opc = TrueVal.getOpcode(); |
14303 | 4.73k | switch (Opc) { |
14304 | 4.70k | default: |
14305 | 4.70k | return SDValue(); |
14306 | 2 | case ISD::SHL: |
14307 | 2 | case ISD::SRA: |
14308 | 2 | case ISD::SRL: |
14309 | 6 | case ISD::SUB: |
14310 | 6 | Commutative = false; |
14311 | 6 | break; |
14312 | 10 | case ISD::ADD: |
14313 | 10 | case ISD::OR: |
14314 | 17 | case ISD::XOR: |
14315 | 17 | break; |
14316 | 4.73k | } |
14317 | | |
14318 | 23 | if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) |
14319 | 23 | return SDValue(); |
14320 | | |
14321 | 0 | unsigned OpToFold; |
14322 | 0 | if (FalseVal == TrueVal.getOperand(0)) |
14323 | 0 | OpToFold = 0; |
14324 | 0 | else if (Commutative && FalseVal == TrueVal.getOperand(1)) |
14325 | 0 | OpToFold = 1; |
14326 | 0 | else |
14327 | 0 | return SDValue(); |
14328 | | |
14329 | 0 | EVT VT = N->getValueType(0); |
14330 | 0 | SDLoc DL(N); |
14331 | 0 | SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); |
14332 | 0 | EVT OtherOpVT = OtherOp->getValueType(0); |
14333 | 0 | SDValue IdentityOperand = |
14334 | 0 | DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags()); |
14335 | 0 | if (!Commutative) |
14336 | 0 | IdentityOperand = DAG.getConstant(0, DL, OtherOpVT); |
14337 | 0 | assert(IdentityOperand && "No identity operand!"); |
14338 | | |
14339 | 0 | if (Swapped) |
14340 | 0 | std::swap(OtherOp, IdentityOperand); |
14341 | 0 | SDValue NewSel = |
14342 | 0 | DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); |
14343 | 0 | return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); |
14344 | 0 | } |
14345 | | |
14346 | | // This tries to get rid of `select` and `icmp` that are being used to handle |
14347 | | // `Targets` that do not support `cttz(0)`/`ctlz(0)`. |
14348 | 2.36k | static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { |
14349 | 2.36k | SDValue Cond = N->getOperand(0); |
14350 | | |
14351 | | // This represents either CTTZ or CTLZ instruction. |
14352 | 2.36k | SDValue CountZeroes; |
14353 | | |
14354 | 2.36k | SDValue ValOnZero; |
14355 | | |
14356 | 2.36k | if (Cond.getOpcode() != ISD::SETCC) |
14357 | 471 | return SDValue(); |
14358 | | |
14359 | 1.89k | if (!isNullConstant(Cond->getOperand(1))) |
14360 | 1.89k | return SDValue(); |
14361 | | |
14362 | 0 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); |
14363 | 0 | if (CCVal == ISD::CondCode::SETEQ) { |
14364 | 0 | CountZeroes = N->getOperand(2); |
14365 | 0 | ValOnZero = N->getOperand(1); |
14366 | 0 | } else if (CCVal == ISD::CondCode::SETNE) { |
14367 | 0 | CountZeroes = N->getOperand(1); |
14368 | 0 | ValOnZero = N->getOperand(2); |
14369 | 0 | } else { |
14370 | 0 | return SDValue(); |
14371 | 0 | } |
14372 | | |
14373 | 0 | if (CountZeroes.getOpcode() == ISD::TRUNCATE || |
14374 | 0 | CountZeroes.getOpcode() == ISD::ZERO_EXTEND) |
14375 | 0 | CountZeroes = CountZeroes.getOperand(0); |
14376 | |
|
14377 | 0 | if (CountZeroes.getOpcode() != ISD::CTTZ && |
14378 | 0 | CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF && |
14379 | 0 | CountZeroes.getOpcode() != ISD::CTLZ && |
14380 | 0 | CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF) |
14381 | 0 | return SDValue(); |
14382 | | |
14383 | 0 | if (!isNullConstant(ValOnZero)) |
14384 | 0 | return SDValue(); |
14385 | | |
14386 | 0 | SDValue CountZeroesArgument = CountZeroes->getOperand(0); |
14387 | 0 | if (Cond->getOperand(0) != CountZeroesArgument) |
14388 | 0 | return SDValue(); |
14389 | | |
14390 | 0 | if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { |
14391 | 0 | CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), |
14392 | 0 | CountZeroes.getValueType(), CountZeroesArgument); |
14393 | 0 | } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) { |
14394 | 0 | CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes), |
14395 | 0 | CountZeroes.getValueType(), CountZeroesArgument); |
14396 | 0 | } |
14397 | |
|
14398 | 0 | unsigned BitWidth = CountZeroes.getValueSizeInBits(); |
14399 | 0 | SDValue BitWidthMinusOne = |
14400 | 0 | DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); |
14401 | |
|
14402 | 0 | auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(), |
14403 | 0 | CountZeroes, BitWidthMinusOne); |
14404 | 0 | return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0)); |
14405 | 0 | } |
14406 | | |
14407 | | static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, |
14408 | 2.36k | const RISCVSubtarget &Subtarget) { |
14409 | 2.36k | SDValue Cond = N->getOperand(0); |
14410 | 2.36k | SDValue True = N->getOperand(1); |
14411 | 2.36k | SDValue False = N->getOperand(2); |
14412 | 2.36k | SDLoc DL(N); |
14413 | 2.36k | EVT VT = N->getValueType(0); |
14414 | 2.36k | EVT CondVT = Cond.getValueType(); |
14415 | | |
14416 | 2.36k | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) |
14417 | 471 | return SDValue(); |
14418 | | |
14419 | | // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate |
14420 | | // BEXTI, where C is power of 2. |
14421 | 1.89k | if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && |
14422 | 1.89k | (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { |
14423 | 0 | SDValue LHS = Cond.getOperand(0); |
14424 | 0 | SDValue RHS = Cond.getOperand(1); |
14425 | 0 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
14426 | 0 | if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND && |
14427 | 0 | isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) { |
14428 | 0 | uint64_t MaskVal = LHS.getConstantOperandVal(1); |
14429 | 0 | if (isPowerOf2_64(MaskVal) && !isInt<12>(MaskVal)) |
14430 | 0 | return DAG.getSelect(DL, VT, |
14431 | 0 | DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), |
14432 | 0 | False, True); |
14433 | 0 | } |
14434 | 0 | } |
14435 | 1.89k | return SDValue(); |
14436 | 1.89k | } |
14437 | | |
14438 | | static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, |
14439 | 2.36k | const RISCVSubtarget &Subtarget) { |
14440 | 2.36k | if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG)) |
14441 | 0 | return Folded; |
14442 | | |
14443 | 2.36k | if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) |
14444 | 0 | return V; |
14445 | | |
14446 | 2.36k | if (Subtarget.hasConditionalMoveFusion()) |
14447 | 0 | return SDValue(); |
14448 | | |
14449 | 2.36k | SDValue TrueVal = N->getOperand(1); |
14450 | 2.36k | SDValue FalseVal = N->getOperand(2); |
14451 | 2.36k | if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) |
14452 | 0 | return V; |
14453 | 2.36k | return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); |
14454 | 2.36k | } |
14455 | | |
14456 | | /// If we have a build_vector where each lane is binop X, C, where C |
14457 | | /// is a constant (but not necessarily the same constant on all lanes), |
14458 | | /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..). |
14459 | | /// We assume that materializing a constant build vector will be no more |
14460 | | /// expensive that performing O(n) binops. |
14461 | | static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, |
14462 | | const RISCVSubtarget &Subtarget, |
14463 | 0 | const RISCVTargetLowering &TLI) { |
14464 | 0 | SDLoc DL(N); |
14465 | 0 | EVT VT = N->getValueType(0); |
14466 | |
|
14467 | 0 | assert(!VT.isScalableVector() && "unexpected build vector"); |
14468 | | |
14469 | 0 | if (VT.getVectorNumElements() == 1) |
14470 | 0 | return SDValue(); |
14471 | | |
14472 | 0 | const unsigned Opcode = N->op_begin()->getNode()->getOpcode(); |
14473 | 0 | if (!TLI.isBinOp(Opcode)) |
14474 | 0 | return SDValue(); |
14475 | | |
14476 | 0 | if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT)) |
14477 | 0 | return SDValue(); |
14478 | | |
14479 | 0 | SmallVector<SDValue> LHSOps; |
14480 | 0 | SmallVector<SDValue> RHSOps; |
14481 | 0 | for (SDValue Op : N->ops()) { |
14482 | 0 | if (Op.isUndef()) { |
14483 | | // We can't form a divide or remainder from undef. |
14484 | 0 | if (!DAG.isSafeToSpeculativelyExecute(Opcode)) |
14485 | 0 | return SDValue(); |
14486 | | |
14487 | 0 | LHSOps.push_back(Op); |
14488 | 0 | RHSOps.push_back(Op); |
14489 | 0 | continue; |
14490 | 0 | } |
14491 | | |
14492 | | // TODO: We can handle operations which have an neutral rhs value |
14493 | | // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track |
14494 | | // of profit in a more explicit manner. |
14495 | 0 | if (Op.getOpcode() != Opcode || !Op.hasOneUse()) |
14496 | 0 | return SDValue(); |
14497 | | |
14498 | 0 | LHSOps.push_back(Op.getOperand(0)); |
14499 | 0 | if (!isa<ConstantSDNode>(Op.getOperand(1)) && |
14500 | 0 | !isa<ConstantFPSDNode>(Op.getOperand(1))) |
14501 | 0 | return SDValue(); |
14502 | | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
14503 | | // have different LHS and RHS types. |
14504 | 0 | if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) |
14505 | 0 | return SDValue(); |
14506 | 0 | RHSOps.push_back(Op.getOperand(1)); |
14507 | 0 | } |
14508 | | |
14509 | 0 | return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps), |
14510 | 0 | DAG.getBuildVector(VT, DL, RHSOps)); |
14511 | 0 | } |
14512 | | |
14513 | | static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, |
14514 | | const RISCVSubtarget &Subtarget, |
14515 | 0 | const RISCVTargetLowering &TLI) { |
14516 | 0 | SDValue InVec = N->getOperand(0); |
14517 | 0 | SDValue InVal = N->getOperand(1); |
14518 | 0 | SDValue EltNo = N->getOperand(2); |
14519 | 0 | SDLoc DL(N); |
14520 | |
|
14521 | 0 | EVT VT = InVec.getValueType(); |
14522 | 0 | if (VT.isScalableVector()) |
14523 | 0 | return SDValue(); |
14524 | | |
14525 | 0 | if (!InVec.hasOneUse()) |
14526 | 0 | return SDValue(); |
14527 | | |
14528 | | // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt |
14529 | | // move the insert_vector_elts into the arms of the binop. Note that |
14530 | | // the new RHS must be a constant. |
14531 | 0 | const unsigned InVecOpcode = InVec->getOpcode(); |
14532 | 0 | if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) && |
14533 | 0 | InVal.hasOneUse()) { |
14534 | 0 | SDValue InVecLHS = InVec->getOperand(0); |
14535 | 0 | SDValue InVecRHS = InVec->getOperand(1); |
14536 | 0 | SDValue InValLHS = InVal->getOperand(0); |
14537 | 0 | SDValue InValRHS = InVal->getOperand(1); |
14538 | |
|
14539 | 0 | if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode())) |
14540 | 0 | return SDValue(); |
14541 | 0 | if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS)) |
14542 | 0 | return SDValue(); |
14543 | | // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may |
14544 | | // have different LHS and RHS types. |
14545 | 0 | if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType()) |
14546 | 0 | return SDValue(); |
14547 | 0 | SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, |
14548 | 0 | InVecLHS, InValLHS, EltNo); |
14549 | 0 | SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, |
14550 | 0 | InVecRHS, InValRHS, EltNo); |
14551 | 0 | return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS); |
14552 | 0 | } |
14553 | | |
14554 | | // Given insert_vector_elt (concat_vectors ...), InVal, Elt |
14555 | | // move the insert_vector_elt to the source operand of the concat_vector. |
14556 | 0 | if (InVec.getOpcode() != ISD::CONCAT_VECTORS) |
14557 | 0 | return SDValue(); |
14558 | | |
14559 | 0 | auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); |
14560 | 0 | if (!IndexC) |
14561 | 0 | return SDValue(); |
14562 | 0 | unsigned Elt = IndexC->getZExtValue(); |
14563 | |
|
14564 | 0 | EVT ConcatVT = InVec.getOperand(0).getValueType(); |
14565 | 0 | if (ConcatVT.getVectorElementType() != InVal.getValueType()) |
14566 | 0 | return SDValue(); |
14567 | 0 | unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); |
14568 | 0 | SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL, |
14569 | 0 | EltNo.getValueType()); |
14570 | |
|
14571 | 0 | unsigned ConcatOpIdx = Elt / ConcatNumElts; |
14572 | 0 | SDValue ConcatOp = InVec.getOperand(ConcatOpIdx); |
14573 | 0 | ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT, |
14574 | 0 | ConcatOp, InVal, NewIdx); |
14575 | |
|
14576 | 0 | SmallVector<SDValue> ConcatOps; |
14577 | 0 | ConcatOps.append(InVec->op_begin(), InVec->op_end()); |
14578 | 0 | ConcatOps[ConcatOpIdx] = ConcatOp; |
14579 | 0 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); |
14580 | 0 | } |
14581 | | |
14582 | | // If we're concatenating a series of vector loads like |
14583 | | // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ... |
14584 | | // Then we can turn this into a strided load by widening the vector elements |
14585 | | // vlse32 p, stride=n |
14586 | | static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, |
14587 | | const RISCVSubtarget &Subtarget, |
14588 | 0 | const RISCVTargetLowering &TLI) { |
14589 | 0 | SDLoc DL(N); |
14590 | 0 | EVT VT = N->getValueType(0); |
14591 | | |
14592 | | // Only perform this combine on legal MVTs. |
14593 | 0 | if (!TLI.isTypeLegal(VT)) |
14594 | 0 | return SDValue(); |
14595 | | |
14596 | | // TODO: Potentially extend this to scalable vectors |
14597 | 0 | if (VT.isScalableVector()) |
14598 | 0 | return SDValue(); |
14599 | | |
14600 | 0 | auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0)); |
14601 | 0 | if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) || |
14602 | 0 | !SDValue(BaseLd, 0).hasOneUse()) |
14603 | 0 | return SDValue(); |
14604 | | |
14605 | 0 | EVT BaseLdVT = BaseLd->getValueType(0); |
14606 | | |
14607 | | // Go through the loads and check that they're strided |
14608 | 0 | SmallVector<LoadSDNode *> Lds; |
14609 | 0 | Lds.push_back(BaseLd); |
14610 | 0 | Align Align = BaseLd->getAlign(); |
14611 | 0 | for (SDValue Op : N->ops().drop_front()) { |
14612 | 0 | auto *Ld = dyn_cast<LoadSDNode>(Op); |
14613 | 0 | if (!Ld || !Ld->isSimple() || !Op.hasOneUse() || |
14614 | 0 | Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) || |
14615 | 0 | Ld->getValueType(0) != BaseLdVT) |
14616 | 0 | return SDValue(); |
14617 | | |
14618 | 0 | Lds.push_back(Ld); |
14619 | | |
14620 | | // The common alignment is the most restrictive (smallest) of all the loads |
14621 | 0 | Align = std::min(Align, Ld->getAlign()); |
14622 | 0 | } |
14623 | | |
14624 | 0 | using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>; |
14625 | 0 | auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, |
14626 | 0 | LoadSDNode *Ld2) -> std::optional<PtrDiff> { |
14627 | | // If the load ptrs can be decomposed into a common (Base + Index) with a |
14628 | | // common constant stride, then return the constant stride. |
14629 | 0 | BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); |
14630 | 0 | BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); |
14631 | 0 | if (BIO1.equalBaseIndex(BIO2, DAG)) |
14632 | 0 | return {{BIO2.getOffset() - BIO1.getOffset(), false}}; |
14633 | | |
14634 | | // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) |
14635 | 0 | SDValue P1 = Ld1->getBasePtr(); |
14636 | 0 | SDValue P2 = Ld2->getBasePtr(); |
14637 | 0 | if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) |
14638 | 0 | return {{P2.getOperand(1), false}}; |
14639 | 0 | if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2) |
14640 | 0 | return {{P1.getOperand(1), true}}; |
14641 | | |
14642 | 0 | return std::nullopt; |
14643 | 0 | }; |
14644 | | |
14645 | | // Get the distance between the first and second loads |
14646 | 0 | auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]); |
14647 | 0 | if (!BaseDiff) |
14648 | 0 | return SDValue(); |
14649 | | |
14650 | | // Check all the loads are the same distance apart |
14651 | 0 | for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++) |
14652 | 0 | if (GetPtrDiff(*It, *std::next(It)) != BaseDiff) |
14653 | 0 | return SDValue(); |
14654 | | |
14655 | | // TODO: At this point, we've successfully matched a generalized gather |
14656 | | // load. Maybe we should emit that, and then move the specialized |
14657 | | // matchers above and below into a DAG combine? |
14658 | | |
14659 | | // Get the widened scalar type, e.g. v4i8 -> i64 |
14660 | 0 | unsigned WideScalarBitWidth = |
14661 | 0 | BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements(); |
14662 | 0 | MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth); |
14663 | | |
14664 | | // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64 |
14665 | 0 | MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands()); |
14666 | 0 | if (!TLI.isTypeLegal(WideVecVT)) |
14667 | 0 | return SDValue(); |
14668 | | |
14669 | | // Check that the operation is legal |
14670 | 0 | if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) |
14671 | 0 | return SDValue(); |
14672 | | |
14673 | 0 | auto [StrideVariant, MustNegateStride] = *BaseDiff; |
14674 | 0 | SDValue Stride = std::holds_alternative<SDValue>(StrideVariant) |
14675 | 0 | ? std::get<SDValue>(StrideVariant) |
14676 | 0 | : DAG.getConstant(std::get<int64_t>(StrideVariant), DL, |
14677 | 0 | Lds[0]->getOffset().getValueType()); |
14678 | 0 | if (MustNegateStride) |
14679 | 0 | Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); |
14680 | |
|
14681 | 0 | SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other}); |
14682 | 0 | SDValue IntID = |
14683 | 0 | DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, |
14684 | 0 | Subtarget.getXLenVT()); |
14685 | |
|
14686 | 0 | SDValue AllOneMask = |
14687 | 0 | DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL, |
14688 | 0 | DAG.getConstant(1, DL, MVT::i1)); |
14689 | |
|
14690 | 0 | SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT), |
14691 | 0 | BaseLd->getBasePtr(), Stride, AllOneMask}; |
14692 | |
|
14693 | 0 | uint64_t MemSize; |
14694 | 0 | if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride); |
14695 | 0 | ConstStride && ConstStride->getSExtValue() >= 0) |
14696 | | // total size = (elsize * n) + (stride - elsize) * (n-1) |
14697 | | // = elsize + stride * (n-1) |
14698 | 0 | MemSize = WideScalarVT.getSizeInBits() + |
14699 | 0 | ConstStride->getSExtValue() * (N->getNumOperands() - 1); |
14700 | 0 | else |
14701 | | // If Stride isn't constant, then we can't know how much it will load |
14702 | 0 | MemSize = MemoryLocation::UnknownSize; |
14703 | |
|
14704 | 0 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
14705 | 0 | BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize, |
14706 | 0 | Align); |
14707 | |
|
14708 | 0 | SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, |
14709 | 0 | Ops, WideVecVT, MMO); |
14710 | 0 | for (SDValue Ld : N->ops()) |
14711 | 0 | DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad); |
14712 | |
|
14713 | 0 | return DAG.getBitcast(VT.getSimpleVT(), StridedLoad); |
14714 | 0 | } |
14715 | | |
14716 | | static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, |
14717 | 0 | const RISCVSubtarget &Subtarget) { |
14718 | 0 | assert(N->getOpcode() == RISCVISD::ADD_VL); |
14719 | 0 | SDValue Addend = N->getOperand(0); |
14720 | 0 | SDValue MulOp = N->getOperand(1); |
14721 | 0 | SDValue AddMergeOp = N->getOperand(2); |
14722 | |
|
14723 | 0 | if (!AddMergeOp.isUndef()) |
14724 | 0 | return SDValue(); |
14725 | | |
14726 | 0 | auto IsVWMulOpc = [](unsigned Opc) { |
14727 | 0 | switch (Opc) { |
14728 | 0 | case RISCVISD::VWMUL_VL: |
14729 | 0 | case RISCVISD::VWMULU_VL: |
14730 | 0 | case RISCVISD::VWMULSU_VL: |
14731 | 0 | return true; |
14732 | 0 | default: |
14733 | 0 | return false; |
14734 | 0 | } |
14735 | 0 | }; |
14736 | |
|
14737 | 0 | if (!IsVWMulOpc(MulOp.getOpcode())) |
14738 | 0 | std::swap(Addend, MulOp); |
14739 | |
|
14740 | 0 | if (!IsVWMulOpc(MulOp.getOpcode())) |
14741 | 0 | return SDValue(); |
14742 | | |
14743 | 0 | SDValue MulMergeOp = MulOp.getOperand(2); |
14744 | |
|
14745 | 0 | if (!MulMergeOp.isUndef()) |
14746 | 0 | return SDValue(); |
14747 | | |
14748 | 0 | SDValue AddMask = N->getOperand(3); |
14749 | 0 | SDValue AddVL = N->getOperand(4); |
14750 | 0 | SDValue MulMask = MulOp.getOperand(3); |
14751 | 0 | SDValue MulVL = MulOp.getOperand(4); |
14752 | |
|
14753 | 0 | if (AddMask != MulMask || AddVL != MulVL) |
14754 | 0 | return SDValue(); |
14755 | | |
14756 | 0 | unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL; |
14757 | 0 | static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL, |
14758 | 0 | "Unexpected opcode after VWMACC_VL"); |
14759 | 0 | static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL, |
14760 | 0 | "Unexpected opcode after VWMACC_VL!"); |
14761 | 0 | static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL, |
14762 | 0 | "Unexpected opcode after VWMUL_VL!"); |
14763 | 0 | static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL, |
14764 | 0 | "Unexpected opcode after VWMUL_VL!"); |
14765 | |
|
14766 | 0 | SDLoc DL(N); |
14767 | 0 | EVT VT = N->getValueType(0); |
14768 | 0 | SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask, |
14769 | 0 | AddVL}; |
14770 | 0 | return DAG.getNode(Opc, DL, VT, Ops); |
14771 | 0 | } |
14772 | | |
14773 | | static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, |
14774 | | ISD::MemIndexType &IndexType, |
14775 | 0 | RISCVTargetLowering::DAGCombinerInfo &DCI) { |
14776 | 0 | if (!DCI.isBeforeLegalize()) |
14777 | 0 | return false; |
14778 | | |
14779 | 0 | SelectionDAG &DAG = DCI.DAG; |
14780 | 0 | const MVT XLenVT = |
14781 | 0 | DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT(); |
14782 | |
|
14783 | 0 | const EVT IndexVT = Index.getValueType(); |
14784 | | |
14785 | | // RISC-V indexed loads only support the "unsigned unscaled" addressing |
14786 | | // mode, so anything else must be manually legalized. |
14787 | 0 | if (!isIndexTypeSigned(IndexType)) |
14788 | 0 | return false; |
14789 | | |
14790 | 0 | if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { |
14791 | | // Any index legalization should first promote to XLenVT, so we don't lose |
14792 | | // bits when scaling. This may create an illegal index type so we let |
14793 | | // LLVM's legalization take care of the splitting. |
14794 | | // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. |
14795 | 0 | Index = DAG.getNode(ISD::SIGN_EXTEND, DL, |
14796 | 0 | IndexVT.changeVectorElementType(XLenVT), Index); |
14797 | 0 | } |
14798 | 0 | IndexType = ISD::UNSIGNED_SCALED; |
14799 | 0 | return true; |
14800 | 0 | } |
14801 | | |
14802 | | /// Match the index vector of a scatter or gather node as the shuffle mask |
14803 | | /// which performs the rearrangement if possible. Will only match if |
14804 | | /// all lanes are touched, and thus replacing the scatter or gather with |
14805 | | /// a unit strided access and shuffle is legal. |
14806 | | static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, |
14807 | 0 | SmallVector<int> &ShuffleMask) { |
14808 | 0 | if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) |
14809 | 0 | return false; |
14810 | 0 | if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) |
14811 | 0 | return false; |
14812 | | |
14813 | 0 | const unsigned ElementSize = VT.getScalarStoreSize(); |
14814 | 0 | const unsigned NumElems = VT.getVectorNumElements(); |
14815 | | |
14816 | | // Create the shuffle mask and check all bits active |
14817 | 0 | assert(ShuffleMask.empty()); |
14818 | 0 | BitVector ActiveLanes(NumElems); |
14819 | 0 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
14820 | | // TODO: We've found an active bit of UB, and could be |
14821 | | // more aggressive here if desired. |
14822 | 0 | if (Index->getOperand(i)->isUndef()) |
14823 | 0 | return false; |
14824 | 0 | uint64_t C = Index->getConstantOperandVal(i); |
14825 | 0 | if (C % ElementSize != 0) |
14826 | 0 | return false; |
14827 | 0 | C = C / ElementSize; |
14828 | 0 | if (C >= NumElems) |
14829 | 0 | return false; |
14830 | 0 | ShuffleMask.push_back(C); |
14831 | 0 | ActiveLanes.set(C); |
14832 | 0 | } |
14833 | 0 | return ActiveLanes.all(); |
14834 | 0 | } |
14835 | | |
14836 | | /// Match the index of a gather or scatter operation as an operation |
14837 | | /// with twice the element width and half the number of elements. This is |
14838 | | /// generally profitable (if legal) because these operations are linear |
14839 | | /// in VL, so even if we cause some extract VTYPE/VL toggles, we still |
14840 | | /// come out ahead. |
14841 | | static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, |
14842 | 0 | Align BaseAlign, const RISCVSubtarget &ST) { |
14843 | 0 | if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode())) |
14844 | 0 | return false; |
14845 | 0 | if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode())) |
14846 | 0 | return false; |
14847 | | |
14848 | | // Attempt a doubling. If we can use a element type 4x or 8x in |
14849 | | // size, this will happen via multiply iterations of the transform. |
14850 | 0 | const unsigned NumElems = VT.getVectorNumElements(); |
14851 | 0 | if (NumElems % 2 != 0) |
14852 | 0 | return false; |
14853 | | |
14854 | 0 | const unsigned ElementSize = VT.getScalarStoreSize(); |
14855 | 0 | const unsigned WiderElementSize = ElementSize * 2; |
14856 | 0 | if (WiderElementSize > ST.getELen()/8) |
14857 | 0 | return false; |
14858 | | |
14859 | 0 | if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize) |
14860 | 0 | return false; |
14861 | | |
14862 | 0 | for (unsigned i = 0; i < Index->getNumOperands(); i++) { |
14863 | | // TODO: We've found an active bit of UB, and could be |
14864 | | // more aggressive here if desired. |
14865 | 0 | if (Index->getOperand(i)->isUndef()) |
14866 | 0 | return false; |
14867 | | // TODO: This offset check is too strict if we support fully |
14868 | | // misaligned memory operations. |
14869 | 0 | uint64_t C = Index->getConstantOperandVal(i); |
14870 | 0 | if (i % 2 == 0) { |
14871 | 0 | if (C % WiderElementSize != 0) |
14872 | 0 | return false; |
14873 | 0 | continue; |
14874 | 0 | } |
14875 | 0 | uint64_t Last = Index->getConstantOperandVal(i-1); |
14876 | 0 | if (C != Last + ElementSize) |
14877 | 0 | return false; |
14878 | 0 | } |
14879 | 0 | return true; |
14880 | 0 | } |
14881 | | |
14882 | | |
14883 | | SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, |
14884 | 753k | DAGCombinerInfo &DCI) const { |
14885 | 753k | SelectionDAG &DAG = DCI.DAG; |
14886 | 753k | const MVT XLenVT = Subtarget.getXLenVT(); |
14887 | 753k | SDLoc DL(N); |
14888 | | |
14889 | | // Helper to call SimplifyDemandedBits on an operand of N where only some low |
14890 | | // bits are demanded. N will be added to the Worklist if it was not deleted. |
14891 | | // Caller should return SDValue(N, 0) if this returns true. |
14892 | 753k | auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { |
14893 | 12.3k | SDValue Op = N->getOperand(OpNo); |
14894 | 12.3k | APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); |
14895 | 12.3k | if (!SimplifyDemandedBits(Op, Mask, DCI)) |
14896 | 11.6k | return false; |
14897 | | |
14898 | 674 | if (N->getOpcode() != ISD::DELETED_NODE) |
14899 | 674 | DCI.AddToWorklist(N); |
14900 | 674 | return true; |
14901 | 12.3k | }; |
14902 | | |
14903 | 753k | switch (N->getOpcode()) { |
14904 | 216k | default: |
14905 | 216k | break; |
14906 | 216k | case RISCVISD::SplitF64: { |
14907 | 0 | SDValue Op0 = N->getOperand(0); |
14908 | | // If the input to SplitF64 is just BuildPairF64 then the operation is |
14909 | | // redundant. Instead, use BuildPairF64's operands directly. |
14910 | 0 | if (Op0->getOpcode() == RISCVISD::BuildPairF64) |
14911 | 0 | return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); |
14912 | | |
14913 | 0 | if (Op0->isUndef()) { |
14914 | 0 | SDValue Lo = DAG.getUNDEF(MVT::i32); |
14915 | 0 | SDValue Hi = DAG.getUNDEF(MVT::i32); |
14916 | 0 | return DCI.CombineTo(N, Lo, Hi); |
14917 | 0 | } |
14918 | | |
14919 | | // It's cheaper to materialise two 32-bit integers than to load a double |
14920 | | // from the constant pool and transfer it to integer registers through the |
14921 | | // stack. |
14922 | 0 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { |
14923 | 0 | APInt V = C->getValueAPF().bitcastToAPInt(); |
14924 | 0 | SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); |
14925 | 0 | SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); |
14926 | 0 | return DCI.CombineTo(N, Lo, Hi); |
14927 | 0 | } |
14928 | | |
14929 | | // This is a target-specific version of a DAGCombine performed in |
14930 | | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
14931 | | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
14932 | | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
14933 | 0 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
14934 | 0 | !Op0.getNode()->hasOneUse()) |
14935 | 0 | break; |
14936 | 0 | SDValue NewSplitF64 = |
14937 | 0 | DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), |
14938 | 0 | Op0.getOperand(0)); |
14939 | 0 | SDValue Lo = NewSplitF64.getValue(0); |
14940 | 0 | SDValue Hi = NewSplitF64.getValue(1); |
14941 | 0 | APInt SignBit = APInt::getSignMask(32); |
14942 | 0 | if (Op0.getOpcode() == ISD::FNEG) { |
14943 | 0 | SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, |
14944 | 0 | DAG.getConstant(SignBit, DL, MVT::i32)); |
14945 | 0 | return DCI.CombineTo(N, Lo, NewHi); |
14946 | 0 | } |
14947 | 0 | assert(Op0.getOpcode() == ISD::FABS); |
14948 | 0 | SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, |
14949 | 0 | DAG.getConstant(~SignBit, DL, MVT::i32)); |
14950 | 0 | return DCI.CombineTo(N, Lo, NewHi); |
14951 | 0 | } |
14952 | 1.81k | case RISCVISD::SLLW: |
14953 | 3.39k | case RISCVISD::SRAW: |
14954 | 6.20k | case RISCVISD::SRLW: |
14955 | 6.20k | case RISCVISD::RORW: |
14956 | 6.20k | case RISCVISD::ROLW: { |
14957 | | // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. |
14958 | 6.20k | if (SimplifyDemandedLowBitsHelper(0, 32) || |
14959 | 6.20k | SimplifyDemandedLowBitsHelper(1, 5)) |
14960 | 674 | return SDValue(N, 0); |
14961 | | |
14962 | 5.53k | break; |
14963 | 6.20k | } |
14964 | 5.53k | case RISCVISD::CLZW: |
14965 | 0 | case RISCVISD::CTZW: { |
14966 | | // Only the lower 32 bits of the first operand are read |
14967 | 0 | if (SimplifyDemandedLowBitsHelper(0, 32)) |
14968 | 0 | return SDValue(N, 0); |
14969 | 0 | break; |
14970 | 0 | } |
14971 | 0 | case RISCVISD::FMV_W_X_RV64: { |
14972 | | // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the |
14973 | | // conversion is unnecessary and can be replaced with the |
14974 | | // FMV_X_ANYEXTW_RV64 operand. |
14975 | 0 | SDValue Op0 = N->getOperand(0); |
14976 | 0 | if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64) |
14977 | 0 | return Op0.getOperand(0); |
14978 | 0 | break; |
14979 | 0 | } |
14980 | 0 | case RISCVISD::FMV_X_ANYEXTH: |
14981 | 0 | case RISCVISD::FMV_X_ANYEXTW_RV64: { |
14982 | 0 | SDLoc DL(N); |
14983 | 0 | SDValue Op0 = N->getOperand(0); |
14984 | 0 | MVT VT = N->getSimpleValueType(0); |
14985 | | // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the |
14986 | | // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 |
14987 | | // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. |
14988 | 0 | if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && |
14989 | 0 | Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || |
14990 | 0 | (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && |
14991 | 0 | Op0->getOpcode() == RISCVISD::FMV_H_X)) { |
14992 | 0 | assert(Op0.getOperand(0).getValueType() == VT && |
14993 | 0 | "Unexpected value type!"); |
14994 | 0 | return Op0.getOperand(0); |
14995 | 0 | } |
14996 | | |
14997 | | // This is a target-specific version of a DAGCombine performed in |
14998 | | // DAGCombiner::visitBITCAST. It performs the equivalent of: |
14999 | | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
15000 | | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
15001 | 0 | if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || |
15002 | 0 | !Op0.getNode()->hasOneUse()) |
15003 | 0 | break; |
15004 | 0 | SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); |
15005 | 0 | unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; |
15006 | 0 | APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); |
15007 | 0 | if (Op0.getOpcode() == ISD::FNEG) |
15008 | 0 | return DAG.getNode(ISD::XOR, DL, VT, NewFMV, |
15009 | 0 | DAG.getConstant(SignBit, DL, VT)); |
15010 | | |
15011 | 0 | assert(Op0.getOpcode() == ISD::FABS); |
15012 | 0 | return DAG.getNode(ISD::AND, DL, VT, NewFMV, |
15013 | 0 | DAG.getConstant(~SignBit, DL, VT)); |
15014 | 0 | } |
15015 | 80.5k | case ISD::ADD: |
15016 | 80.5k | return performADDCombine(N, DAG, Subtarget); |
15017 | 32.5k | case ISD::SUB: |
15018 | 32.5k | return performSUBCombine(N, DAG, Subtarget); |
15019 | 83.6k | case ISD::AND: |
15020 | 83.6k | return performANDCombine(N, DCI, Subtarget); |
15021 | 50.3k | case ISD::OR: |
15022 | 50.3k | return performORCombine(N, DCI, Subtarget); |
15023 | 82.9k | case ISD::XOR: |
15024 | 82.9k | return performXORCombine(N, DAG, Subtarget); |
15025 | 0 | case ISD::MUL: |
15026 | 0 | return performMULCombine(N, DAG); |
15027 | 0 | case ISD::FADD: |
15028 | 0 | case ISD::UMAX: |
15029 | 0 | case ISD::UMIN: |
15030 | 0 | case ISD::SMAX: |
15031 | 0 | case ISD::SMIN: |
15032 | 0 | case ISD::FMAXNUM: |
15033 | 0 | case ISD::FMINNUM: { |
15034 | 0 | if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) |
15035 | 0 | return V; |
15036 | 0 | if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) |
15037 | 0 | return V; |
15038 | 0 | return SDValue(); |
15039 | 0 | } |
15040 | 183k | case ISD::SETCC: |
15041 | 183k | return performSETCCCombine(N, DAG, Subtarget); |
15042 | 0 | case ISD::SIGN_EXTEND_INREG: |
15043 | 0 | return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); |
15044 | 0 | case ISD::ZERO_EXTEND: |
15045 | | // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during |
15046 | | // type legalization. This is safe because fp_to_uint produces poison if |
15047 | | // it overflows. |
15048 | 0 | if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { |
15049 | 0 | SDValue Src = N->getOperand(0); |
15050 | 0 | if (Src.getOpcode() == ISD::FP_TO_UINT && |
15051 | 0 | isTypeLegal(Src.getOperand(0).getValueType())) |
15052 | 0 | return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, |
15053 | 0 | Src.getOperand(0)); |
15054 | 0 | if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && |
15055 | 0 | isTypeLegal(Src.getOperand(1).getValueType())) { |
15056 | 0 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); |
15057 | 0 | SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, |
15058 | 0 | Src.getOperand(0), Src.getOperand(1)); |
15059 | 0 | DCI.CombineTo(N, Res); |
15060 | 0 | DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); |
15061 | 0 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
15062 | 0 | return SDValue(N, 0); // Return N so it doesn't get rechecked. |
15063 | 0 | } |
15064 | 0 | } |
15065 | 0 | return SDValue(); |
15066 | 0 | case RISCVISD::TRUNCATE_VECTOR_VL: { |
15067 | | // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) |
15068 | | // This would be benefit for the cases where X and Y are both the same value |
15069 | | // type of low precision vectors. Since the truncate would be lowered into |
15070 | | // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate |
15071 | | // restriction, such pattern would be expanded into a series of "vsetvli" |
15072 | | // and "vnsrl" instructions later to reach this point. |
15073 | 0 | auto IsTruncNode = [](SDValue V) { |
15074 | 0 | if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) |
15075 | 0 | return false; |
15076 | 0 | SDValue VL = V.getOperand(2); |
15077 | 0 | auto *C = dyn_cast<ConstantSDNode>(VL); |
15078 | | // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand |
15079 | 0 | bool IsVLMAXForVMSET = (C && C->isAllOnes()) || |
15080 | 0 | (isa<RegisterSDNode>(VL) && |
15081 | 0 | cast<RegisterSDNode>(VL)->getReg() == RISCV::X0); |
15082 | 0 | return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && |
15083 | 0 | IsVLMAXForVMSET; |
15084 | 0 | }; |
15085 | |
|
15086 | 0 | SDValue Op = N->getOperand(0); |
15087 | | |
15088 | | // We need to first find the inner level of TRUNCATE_VECTOR_VL node |
15089 | | // to distinguish such pattern. |
15090 | 0 | while (IsTruncNode(Op)) { |
15091 | 0 | if (!Op.hasOneUse()) |
15092 | 0 | return SDValue(); |
15093 | 0 | Op = Op.getOperand(0); |
15094 | 0 | } |
15095 | | |
15096 | 0 | if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { |
15097 | 0 | SDValue N0 = Op.getOperand(0); |
15098 | 0 | SDValue N1 = Op.getOperand(1); |
15099 | 0 | if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && |
15100 | 0 | N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { |
15101 | 0 | SDValue N00 = N0.getOperand(0); |
15102 | 0 | SDValue N10 = N1.getOperand(0); |
15103 | 0 | if (N00.getValueType().isVector() && |
15104 | 0 | N00.getValueType() == N10.getValueType() && |
15105 | 0 | N->getValueType(0) == N10.getValueType()) { |
15106 | 0 | unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; |
15107 | 0 | SDValue SMin = DAG.getNode( |
15108 | 0 | ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, |
15109 | 0 | DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); |
15110 | 0 | return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); |
15111 | 0 | } |
15112 | 0 | } |
15113 | 0 | } |
15114 | 0 | break; |
15115 | 0 | } |
15116 | 0 | case ISD::TRUNCATE: |
15117 | 0 | return performTRUNCATECombine(N, DAG, Subtarget); |
15118 | 2.36k | case ISD::SELECT: |
15119 | 2.36k | return performSELECTCombine(N, DAG, Subtarget); |
15120 | 0 | case RISCVISD::CZERO_EQZ: |
15121 | 0 | case RISCVISD::CZERO_NEZ: |
15122 | | // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1. |
15123 | | // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1. |
15124 | 0 | if (N->getOperand(1).getOpcode() == ISD::XOR && |
15125 | 0 | isOneConstant(N->getOperand(1).getOperand(1))) { |
15126 | 0 | SDValue Cond = N->getOperand(1).getOperand(0); |
15127 | 0 | APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1); |
15128 | 0 | if (DAG.MaskedValueIsZero(Cond, Mask)) { |
15129 | 0 | unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ |
15130 | 0 | ? RISCVISD::CZERO_NEZ |
15131 | 0 | : RISCVISD::CZERO_EQZ; |
15132 | 0 | return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), |
15133 | 0 | N->getOperand(0), Cond); |
15134 | 0 | } |
15135 | 0 | } |
15136 | 0 | return SDValue(); |
15137 | | |
15138 | 0 | case RISCVISD::SELECT_CC: { |
15139 | | // Transform |
15140 | 0 | SDValue LHS = N->getOperand(0); |
15141 | 0 | SDValue RHS = N->getOperand(1); |
15142 | 0 | SDValue CC = N->getOperand(2); |
15143 | 0 | ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); |
15144 | 0 | SDValue TrueV = N->getOperand(3); |
15145 | 0 | SDValue FalseV = N->getOperand(4); |
15146 | 0 | SDLoc DL(N); |
15147 | 0 | EVT VT = N->getValueType(0); |
15148 | | |
15149 | | // If the True and False values are the same, we don't need a select_cc. |
15150 | 0 | if (TrueV == FalseV) |
15151 | 0 | return TrueV; |
15152 | | |
15153 | | // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z |
15154 | | // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y |
15155 | 0 | if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && |
15156 | 0 | isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && |
15157 | 0 | (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { |
15158 | 0 | if (CCVal == ISD::CondCode::SETGE) |
15159 | 0 | std::swap(TrueV, FalseV); |
15160 | |
|
15161 | 0 | int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); |
15162 | 0 | int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); |
15163 | | // Only handle simm12, if it is not in this range, it can be considered as |
15164 | | // register. |
15165 | 0 | if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && |
15166 | 0 | isInt<12>(TrueSImm - FalseSImm)) { |
15167 | 0 | SDValue SRA = |
15168 | 0 | DAG.getNode(ISD::SRA, DL, VT, LHS, |
15169 | 0 | DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); |
15170 | 0 | SDValue AND = |
15171 | 0 | DAG.getNode(ISD::AND, DL, VT, SRA, |
15172 | 0 | DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); |
15173 | 0 | return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); |
15174 | 0 | } |
15175 | | |
15176 | 0 | if (CCVal == ISD::CondCode::SETGE) |
15177 | 0 | std::swap(TrueV, FalseV); |
15178 | 0 | } |
15179 | | |
15180 | 0 | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
15181 | 0 | return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), |
15182 | 0 | {LHS, RHS, CC, TrueV, FalseV}); |
15183 | | |
15184 | 0 | if (!Subtarget.hasConditionalMoveFusion()) { |
15185 | | // (select c, -1, y) -> -c | y |
15186 | 0 | if (isAllOnesConstant(TrueV)) { |
15187 | 0 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); |
15188 | 0 | SDValue Neg = DAG.getNegative(C, DL, VT); |
15189 | 0 | return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); |
15190 | 0 | } |
15191 | | // (select c, y, -1) -> -!c | y |
15192 | 0 | if (isAllOnesConstant(FalseV)) { |
15193 | 0 | SDValue C = |
15194 | 0 | DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); |
15195 | 0 | SDValue Neg = DAG.getNegative(C, DL, VT); |
15196 | 0 | return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); |
15197 | 0 | } |
15198 | | |
15199 | | // (select c, 0, y) -> -!c & y |
15200 | 0 | if (isNullConstant(TrueV)) { |
15201 | 0 | SDValue C = |
15202 | 0 | DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); |
15203 | 0 | SDValue Neg = DAG.getNegative(C, DL, VT); |
15204 | 0 | return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); |
15205 | 0 | } |
15206 | | // (select c, y, 0) -> -c & y |
15207 | 0 | if (isNullConstant(FalseV)) { |
15208 | 0 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); |
15209 | 0 | SDValue Neg = DAG.getNegative(C, DL, VT); |
15210 | 0 | return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); |
15211 | 0 | } |
15212 | | // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq)) |
15213 | | // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq)) |
15214 | 0 | if (((isOneConstant(FalseV) && LHS == TrueV && |
15215 | 0 | CCVal == ISD::CondCode::SETNE) || |
15216 | 0 | (isOneConstant(TrueV) && LHS == FalseV && |
15217 | 0 | CCVal == ISD::CondCode::SETEQ)) && |
15218 | 0 | isNullConstant(RHS)) { |
15219 | | // freeze it to be safe. |
15220 | 0 | LHS = DAG.getFreeze(LHS); |
15221 | 0 | SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ); |
15222 | 0 | return DAG.getNode(ISD::ADD, DL, VT, LHS, C); |
15223 | 0 | } |
15224 | 0 | } |
15225 | | |
15226 | | // If both true/false are an xor with 1, pull through the select. |
15227 | | // This can occur after op legalization if both operands are setccs that |
15228 | | // require an xor to invert. |
15229 | | // FIXME: Generalize to other binary ops with identical operand? |
15230 | 0 | if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR && |
15231 | 0 | TrueV.getOperand(1) == FalseV.getOperand(1) && |
15232 | 0 | isOneConstant(TrueV.getOperand(1)) && |
15233 | 0 | TrueV.hasOneUse() && FalseV.hasOneUse()) { |
15234 | 0 | SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC, |
15235 | 0 | TrueV.getOperand(0), FalseV.getOperand(0)); |
15236 | 0 | return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1)); |
15237 | 0 | } |
15238 | | |
15239 | 0 | return SDValue(); |
15240 | 0 | } |
15241 | 3.24k | case RISCVISD::BR_CC: { |
15242 | 3.24k | SDValue LHS = N->getOperand(1); |
15243 | 3.24k | SDValue RHS = N->getOperand(2); |
15244 | 3.24k | SDValue CC = N->getOperand(3); |
15245 | 3.24k | SDLoc DL(N); |
15246 | | |
15247 | 3.24k | if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) |
15248 | 4 | return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), |
15249 | 4 | N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); |
15250 | | |
15251 | 3.24k | return SDValue(); |
15252 | 3.24k | } |
15253 | 0 | case ISD::BITREVERSE: |
15254 | 0 | return performBITREVERSECombine(N, DAG, Subtarget); |
15255 | 0 | case ISD::FP_TO_SINT: |
15256 | 0 | case ISD::FP_TO_UINT: |
15257 | 0 | return performFP_TO_INTCombine(N, DCI, Subtarget); |
15258 | 0 | case ISD::FP_TO_SINT_SAT: |
15259 | 0 | case ISD::FP_TO_UINT_SAT: |
15260 | 0 | return performFP_TO_INT_SATCombine(N, DCI, Subtarget); |
15261 | 0 | case ISD::FCOPYSIGN: { |
15262 | 0 | EVT VT = N->getValueType(0); |
15263 | 0 | if (!VT.isVector()) |
15264 | 0 | break; |
15265 | | // There is a form of VFSGNJ which injects the negated sign of its second |
15266 | | // operand. Try and bubble any FNEG up after the extend/round to produce |
15267 | | // this optimized pattern. Avoid modifying cases where FP_ROUND and |
15268 | | // TRUNC=1. |
15269 | 0 | SDValue In2 = N->getOperand(1); |
15270 | | // Avoid cases where the extend/round has multiple uses, as duplicating |
15271 | | // those is typically more expensive than removing a fneg. |
15272 | 0 | if (!In2.hasOneUse()) |
15273 | 0 | break; |
15274 | 0 | if (In2.getOpcode() != ISD::FP_EXTEND && |
15275 | 0 | (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) |
15276 | 0 | break; |
15277 | 0 | In2 = In2.getOperand(0); |
15278 | 0 | if (In2.getOpcode() != ISD::FNEG) |
15279 | 0 | break; |
15280 | 0 | SDLoc DL(N); |
15281 | 0 | SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); |
15282 | 0 | return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), |
15283 | 0 | DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); |
15284 | 0 | } |
15285 | 0 | case ISD::MGATHER: { |
15286 | 0 | const auto *MGN = dyn_cast<MaskedGatherSDNode>(N); |
15287 | 0 | const EVT VT = N->getValueType(0); |
15288 | 0 | SDValue Index = MGN->getIndex(); |
15289 | 0 | SDValue ScaleOp = MGN->getScale(); |
15290 | 0 | ISD::MemIndexType IndexType = MGN->getIndexType(); |
15291 | 0 | assert(!MGN->isIndexScaled() && |
15292 | 0 | "Scaled gather/scatter should not be formed"); |
15293 | | |
15294 | 0 | SDLoc DL(N); |
15295 | 0 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
15296 | 0 | return DAG.getMaskedGather( |
15297 | 0 | N->getVTList(), MGN->getMemoryVT(), DL, |
15298 | 0 | {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
15299 | 0 | MGN->getBasePtr(), Index, ScaleOp}, |
15300 | 0 | MGN->getMemOperand(), IndexType, MGN->getExtensionType()); |
15301 | | |
15302 | 0 | if (narrowIndex(Index, IndexType, DAG)) |
15303 | 0 | return DAG.getMaskedGather( |
15304 | 0 | N->getVTList(), MGN->getMemoryVT(), DL, |
15305 | 0 | {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), |
15306 | 0 | MGN->getBasePtr(), Index, ScaleOp}, |
15307 | 0 | MGN->getMemOperand(), IndexType, MGN->getExtensionType()); |
15308 | | |
15309 | 0 | if (Index.getOpcode() == ISD::BUILD_VECTOR && |
15310 | 0 | MGN->getExtensionType() == ISD::NON_EXTLOAD) { |
15311 | 0 | if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index); |
15312 | 0 | SimpleVID && SimpleVID->StepDenominator == 1) { |
15313 | 0 | const int64_t StepNumerator = SimpleVID->StepNumerator; |
15314 | 0 | const int64_t Addend = SimpleVID->Addend; |
15315 | | |
15316 | | // Note: We don't need to check alignment here since (by assumption |
15317 | | // from the existance of the gather), our offsets must be sufficiently |
15318 | | // aligned. |
15319 | |
|
15320 | 0 | const EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
15321 | 0 | assert(MGN->getBasePtr()->getValueType(0) == PtrVT); |
15322 | 0 | assert(IndexType == ISD::UNSIGNED_SCALED); |
15323 | 0 | SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(), |
15324 | 0 | DAG.getConstant(Addend, DL, PtrVT)); |
15325 | |
|
15326 | 0 | SDVTList VTs = DAG.getVTList({VT, MVT::Other}); |
15327 | 0 | SDValue IntID = |
15328 | 0 | DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL, |
15329 | 0 | XLenVT); |
15330 | 0 | SDValue Ops[] = |
15331 | 0 | {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr, |
15332 | 0 | DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()}; |
15333 | 0 | return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, |
15334 | 0 | Ops, VT, MGN->getMemOperand()); |
15335 | 0 | } |
15336 | 0 | } |
15337 | | |
15338 | 0 | SmallVector<int> ShuffleMask; |
15339 | 0 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
15340 | 0 | matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) { |
15341 | 0 | SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(), |
15342 | 0 | MGN->getBasePtr(), DAG.getUNDEF(XLenVT), |
15343 | 0 | MGN->getMask(), DAG.getUNDEF(VT), |
15344 | 0 | MGN->getMemoryVT(), MGN->getMemOperand(), |
15345 | 0 | ISD::UNINDEXED, ISD::NON_EXTLOAD); |
15346 | 0 | SDValue Shuffle = |
15347 | 0 | DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask); |
15348 | 0 | return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL); |
15349 | 0 | } |
15350 | | |
15351 | 0 | if (MGN->getExtensionType() == ISD::NON_EXTLOAD && |
15352 | 0 | matchIndexAsWiderOp(VT, Index, MGN->getMask(), |
15353 | 0 | MGN->getMemOperand()->getBaseAlign(), Subtarget)) { |
15354 | 0 | SmallVector<SDValue> NewIndices; |
15355 | 0 | for (unsigned i = 0; i < Index->getNumOperands(); i += 2) |
15356 | 0 | NewIndices.push_back(Index.getOperand(i)); |
15357 | 0 | EVT IndexVT = Index.getValueType() |
15358 | 0 | .getHalfNumVectorElementsVT(*DAG.getContext()); |
15359 | 0 | Index = DAG.getBuildVector(IndexVT, DL, NewIndices); |
15360 | |
|
15361 | 0 | unsigned ElementSize = VT.getScalarStoreSize(); |
15362 | 0 | EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2); |
15363 | 0 | auto EltCnt = VT.getVectorElementCount(); |
15364 | 0 | assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); |
15365 | 0 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT, |
15366 | 0 | EltCnt.divideCoefficientBy(2)); |
15367 | 0 | SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru()); |
15368 | 0 | EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, |
15369 | 0 | EltCnt.divideCoefficientBy(2)); |
15370 | 0 | SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1)); |
15371 | |
|
15372 | 0 | SDValue Gather = |
15373 | 0 | DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL, |
15374 | 0 | {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(), |
15375 | 0 | Index, ScaleOp}, |
15376 | 0 | MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD); |
15377 | 0 | SDValue Result = DAG.getBitcast(VT, Gather.getValue(0)); |
15378 | 0 | return DAG.getMergeValues({Result, Gather.getValue(1)}, DL); |
15379 | 0 | } |
15380 | 0 | break; |
15381 | 0 | } |
15382 | 0 | case ISD::MSCATTER:{ |
15383 | 0 | const auto *MSN = dyn_cast<MaskedScatterSDNode>(N); |
15384 | 0 | SDValue Index = MSN->getIndex(); |
15385 | 0 | SDValue ScaleOp = MSN->getScale(); |
15386 | 0 | ISD::MemIndexType IndexType = MSN->getIndexType(); |
15387 | 0 | assert(!MSN->isIndexScaled() && |
15388 | 0 | "Scaled gather/scatter should not be formed"); |
15389 | | |
15390 | 0 | SDLoc DL(N); |
15391 | 0 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
15392 | 0 | return DAG.getMaskedScatter( |
15393 | 0 | N->getVTList(), MSN->getMemoryVT(), DL, |
15394 | 0 | {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
15395 | 0 | Index, ScaleOp}, |
15396 | 0 | MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); |
15397 | | |
15398 | 0 | if (narrowIndex(Index, IndexType, DAG)) |
15399 | 0 | return DAG.getMaskedScatter( |
15400 | 0 | N->getVTList(), MSN->getMemoryVT(), DL, |
15401 | 0 | {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), |
15402 | 0 | Index, ScaleOp}, |
15403 | 0 | MSN->getMemOperand(), IndexType, MSN->isTruncatingStore()); |
15404 | | |
15405 | 0 | EVT VT = MSN->getValue()->getValueType(0); |
15406 | 0 | SmallVector<int> ShuffleMask; |
15407 | 0 | if (!MSN->isTruncatingStore() && |
15408 | 0 | matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) { |
15409 | 0 | SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(), |
15410 | 0 | DAG.getUNDEF(VT), ShuffleMask); |
15411 | 0 | return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(), |
15412 | 0 | DAG.getUNDEF(XLenVT), MSN->getMask(), |
15413 | 0 | MSN->getMemoryVT(), MSN->getMemOperand(), |
15414 | 0 | ISD::UNINDEXED, false); |
15415 | 0 | } |
15416 | 0 | break; |
15417 | 0 | } |
15418 | 0 | case ISD::VP_GATHER: { |
15419 | 0 | const auto *VPGN = dyn_cast<VPGatherSDNode>(N); |
15420 | 0 | SDValue Index = VPGN->getIndex(); |
15421 | 0 | SDValue ScaleOp = VPGN->getScale(); |
15422 | 0 | ISD::MemIndexType IndexType = VPGN->getIndexType(); |
15423 | 0 | assert(!VPGN->isIndexScaled() && |
15424 | 0 | "Scaled gather/scatter should not be formed"); |
15425 | | |
15426 | 0 | SDLoc DL(N); |
15427 | 0 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
15428 | 0 | return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, |
15429 | 0 | {VPGN->getChain(), VPGN->getBasePtr(), Index, |
15430 | 0 | ScaleOp, VPGN->getMask(), |
15431 | 0 | VPGN->getVectorLength()}, |
15432 | 0 | VPGN->getMemOperand(), IndexType); |
15433 | | |
15434 | 0 | if (narrowIndex(Index, IndexType, DAG)) |
15435 | 0 | return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, |
15436 | 0 | {VPGN->getChain(), VPGN->getBasePtr(), Index, |
15437 | 0 | ScaleOp, VPGN->getMask(), |
15438 | 0 | VPGN->getVectorLength()}, |
15439 | 0 | VPGN->getMemOperand(), IndexType); |
15440 | | |
15441 | 0 | break; |
15442 | 0 | } |
15443 | 0 | case ISD::VP_SCATTER: { |
15444 | 0 | const auto *VPSN = dyn_cast<VPScatterSDNode>(N); |
15445 | 0 | SDValue Index = VPSN->getIndex(); |
15446 | 0 | SDValue ScaleOp = VPSN->getScale(); |
15447 | 0 | ISD::MemIndexType IndexType = VPSN->getIndexType(); |
15448 | 0 | assert(!VPSN->isIndexScaled() && |
15449 | 0 | "Scaled gather/scatter should not be formed"); |
15450 | | |
15451 | 0 | SDLoc DL(N); |
15452 | 0 | if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI)) |
15453 | 0 | return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, |
15454 | 0 | {VPSN->getChain(), VPSN->getValue(), |
15455 | 0 | VPSN->getBasePtr(), Index, ScaleOp, |
15456 | 0 | VPSN->getMask(), VPSN->getVectorLength()}, |
15457 | 0 | VPSN->getMemOperand(), IndexType); |
15458 | | |
15459 | 0 | if (narrowIndex(Index, IndexType, DAG)) |
15460 | 0 | return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, |
15461 | 0 | {VPSN->getChain(), VPSN->getValue(), |
15462 | 0 | VPSN->getBasePtr(), Index, ScaleOp, |
15463 | 0 | VPSN->getMask(), VPSN->getVectorLength()}, |
15464 | 0 | VPSN->getMemOperand(), IndexType); |
15465 | 0 | break; |
15466 | 0 | } |
15467 | 0 | case RISCVISD::SRA_VL: |
15468 | 0 | case RISCVISD::SRL_VL: |
15469 | 0 | case RISCVISD::SHL_VL: { |
15470 | 0 | SDValue ShAmt = N->getOperand(1); |
15471 | 0 | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
15472 | | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
15473 | 0 | SDLoc DL(N); |
15474 | 0 | SDValue VL = N->getOperand(4); |
15475 | 0 | EVT VT = N->getValueType(0); |
15476 | 0 | ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
15477 | 0 | ShAmt.getOperand(1), VL); |
15478 | 0 | return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, |
15479 | 0 | N->getOperand(2), N->getOperand(3), N->getOperand(4)); |
15480 | 0 | } |
15481 | 0 | break; |
15482 | 0 | } |
15483 | 11.5k | case ISD::SRA: |
15484 | 11.5k | if (SDValue V = performSRACombine(N, DAG, Subtarget)) |
15485 | 0 | return V; |
15486 | 11.5k | [[fallthrough]]; |
15487 | 11.5k | case ISD::SRL: |
15488 | 11.5k | case ISD::SHL: { |
15489 | 11.5k | SDValue ShAmt = N->getOperand(1); |
15490 | 11.5k | if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { |
15491 | | // We don't need the upper 32 bits of a 64-bit element for a shift amount. |
15492 | 0 | SDLoc DL(N); |
15493 | 0 | EVT VT = N->getValueType(0); |
15494 | 0 | ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), |
15495 | 0 | ShAmt.getOperand(1), |
15496 | 0 | DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); |
15497 | 0 | return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); |
15498 | 0 | } |
15499 | 11.5k | break; |
15500 | 11.5k | } |
15501 | 11.5k | case RISCVISD::ADD_VL: |
15502 | 0 | if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) |
15503 | 0 | return V; |
15504 | 0 | return combineToVWMACC(N, DAG, Subtarget); |
15505 | 0 | case RISCVISD::SUB_VL: |
15506 | 0 | case RISCVISD::VWADD_W_VL: |
15507 | 0 | case RISCVISD::VWADDU_W_VL: |
15508 | 0 | case RISCVISD::VWSUB_W_VL: |
15509 | 0 | case RISCVISD::VWSUBU_W_VL: |
15510 | 0 | case RISCVISD::MUL_VL: |
15511 | 0 | return combineBinOp_VLToVWBinOp_VL(N, DCI); |
15512 | 0 | case RISCVISD::VFMADD_VL: |
15513 | 0 | case RISCVISD::VFNMADD_VL: |
15514 | 0 | case RISCVISD::VFMSUB_VL: |
15515 | 0 | case RISCVISD::VFNMSUB_VL: |
15516 | 0 | case RISCVISD::STRICT_VFMADD_VL: |
15517 | 0 | case RISCVISD::STRICT_VFNMADD_VL: |
15518 | 0 | case RISCVISD::STRICT_VFMSUB_VL: |
15519 | 0 | case RISCVISD::STRICT_VFNMSUB_VL: |
15520 | 0 | return performVFMADD_VLCombine(N, DAG, Subtarget); |
15521 | 0 | case RISCVISD::FMUL_VL: |
15522 | 0 | return performVFMUL_VLCombine(N, DAG, Subtarget); |
15523 | 0 | case RISCVISD::FADD_VL: |
15524 | 0 | case RISCVISD::FSUB_VL: |
15525 | 0 | return performFADDSUB_VLCombine(N, DAG, Subtarget); |
15526 | 0 | case ISD::LOAD: |
15527 | 0 | case ISD::STORE: { |
15528 | 0 | if (DCI.isAfterLegalizeDAG()) |
15529 | 0 | if (SDValue V = performMemPairCombine(N, DCI)) |
15530 | 0 | return V; |
15531 | | |
15532 | 0 | if (N->getOpcode() != ISD::STORE) |
15533 | 0 | break; |
15534 | | |
15535 | 0 | auto *Store = cast<StoreSDNode>(N); |
15536 | 0 | SDValue Chain = Store->getChain(); |
15537 | 0 | EVT MemVT = Store->getMemoryVT(); |
15538 | 0 | SDValue Val = Store->getValue(); |
15539 | 0 | SDLoc DL(N); |
15540 | |
|
15541 | 0 | bool IsScalarizable = |
15542 | 0 | MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) && |
15543 | 0 | Store->isSimple() && |
15544 | 0 | MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && |
15545 | 0 | isPowerOf2_64(MemVT.getSizeInBits()) && |
15546 | 0 | MemVT.getSizeInBits() <= Subtarget.getXLen(); |
15547 | | |
15548 | | // If sufficiently aligned we can scalarize stores of constant vectors of |
15549 | | // any power-of-two size up to XLen bits, provided that they aren't too |
15550 | | // expensive to materialize. |
15551 | | // vsetivli zero, 2, e8, m1, ta, ma |
15552 | | // vmv.v.i v8, 4 |
15553 | | // vse64.v v8, (a0) |
15554 | | // -> |
15555 | | // li a1, 1028 |
15556 | | // sh a1, 0(a0) |
15557 | 0 | if (DCI.isBeforeLegalize() && IsScalarizable && |
15558 | 0 | ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { |
15559 | | // Get the constant vector bits |
15560 | 0 | APInt NewC(Val.getValueSizeInBits(), 0); |
15561 | 0 | uint64_t EltSize = Val.getScalarValueSizeInBits(); |
15562 | 0 | for (unsigned i = 0; i < Val.getNumOperands(); i++) { |
15563 | 0 | if (Val.getOperand(i).isUndef()) |
15564 | 0 | continue; |
15565 | 0 | NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize), |
15566 | 0 | i * EltSize); |
15567 | 0 | } |
15568 | 0 | MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); |
15569 | |
|
15570 | 0 | if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget, |
15571 | 0 | true) <= 2 && |
15572 | 0 | allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
15573 | 0 | NewVT, *Store->getMemOperand())) { |
15574 | 0 | SDValue NewV = DAG.getConstant(NewC, DL, NewVT); |
15575 | 0 | return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), |
15576 | 0 | Store->getPointerInfo(), Store->getOriginalAlign(), |
15577 | 0 | Store->getMemOperand()->getFlags()); |
15578 | 0 | } |
15579 | 0 | } |
15580 | | |
15581 | | // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. |
15582 | | // vsetivli zero, 2, e16, m1, ta, ma |
15583 | | // vle16.v v8, (a0) |
15584 | | // vse16.v v8, (a1) |
15585 | 0 | if (auto *L = dyn_cast<LoadSDNode>(Val); |
15586 | 0 | L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() && |
15587 | 0 | L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) && |
15588 | 0 | Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) && |
15589 | 0 | L->getMemoryVT() == MemVT) { |
15590 | 0 | MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); |
15591 | 0 | if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
15592 | 0 | NewVT, *Store->getMemOperand()) && |
15593 | 0 | allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), |
15594 | 0 | NewVT, *L->getMemOperand())) { |
15595 | 0 | SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(), |
15596 | 0 | L->getPointerInfo(), L->getOriginalAlign(), |
15597 | 0 | L->getMemOperand()->getFlags()); |
15598 | 0 | return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(), |
15599 | 0 | Store->getPointerInfo(), Store->getOriginalAlign(), |
15600 | 0 | Store->getMemOperand()->getFlags()); |
15601 | 0 | } |
15602 | 0 | } |
15603 | | |
15604 | | // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. |
15605 | | // vfmv.f.s is represented as extract element from 0. Match it late to avoid |
15606 | | // any illegal types. |
15607 | 0 | if (Val.getOpcode() == RISCVISD::VMV_X_S || |
15608 | 0 | (DCI.isAfterLegalizeDAG() && |
15609 | 0 | Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
15610 | 0 | isNullConstant(Val.getOperand(1)))) { |
15611 | 0 | SDValue Src = Val.getOperand(0); |
15612 | 0 | MVT VecVT = Src.getSimpleValueType(); |
15613 | | // VecVT should be scalable and memory VT should match the element type. |
15614 | 0 | if (!Store->isIndexed() && VecVT.isScalableVector() && |
15615 | 0 | MemVT == VecVT.getVectorElementType()) { |
15616 | 0 | SDLoc DL(N); |
15617 | 0 | MVT MaskVT = getMaskTypeFor(VecVT); |
15618 | 0 | return DAG.getStoreVP( |
15619 | 0 | Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), |
15620 | 0 | DAG.getConstant(1, DL, MaskVT), |
15621 | 0 | DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, |
15622 | 0 | Store->getMemOperand(), Store->getAddressingMode(), |
15623 | 0 | Store->isTruncatingStore(), /*IsCompress*/ false); |
15624 | 0 | } |
15625 | 0 | } |
15626 | | |
15627 | 0 | break; |
15628 | 0 | } |
15629 | 0 | case ISD::SPLAT_VECTOR: { |
15630 | 0 | EVT VT = N->getValueType(0); |
15631 | | // Only perform this combine on legal MVT types. |
15632 | 0 | if (!isTypeLegal(VT)) |
15633 | 0 | break; |
15634 | 0 | if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, |
15635 | 0 | DAG, Subtarget)) |
15636 | 0 | return Gather; |
15637 | 0 | break; |
15638 | 0 | } |
15639 | 0 | case ISD::BUILD_VECTOR: |
15640 | 0 | if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this)) |
15641 | 0 | return V; |
15642 | 0 | break; |
15643 | 0 | case ISD::CONCAT_VECTORS: |
15644 | 0 | if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) |
15645 | 0 | return V; |
15646 | 0 | break; |
15647 | 0 | case ISD::INSERT_VECTOR_ELT: |
15648 | 0 | if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this)) |
15649 | 0 | return V; |
15650 | 0 | break; |
15651 | 0 | case RISCVISD::VFMV_V_F_VL: { |
15652 | 0 | const MVT VT = N->getSimpleValueType(0); |
15653 | 0 | SDValue Passthru = N->getOperand(0); |
15654 | 0 | SDValue Scalar = N->getOperand(1); |
15655 | 0 | SDValue VL = N->getOperand(2); |
15656 | | |
15657 | | // If VL is 1, we can use vfmv.s.f. |
15658 | 0 | if (isOneConstant(VL)) |
15659 | 0 | return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); |
15660 | 0 | break; |
15661 | 0 | } |
15662 | 0 | case RISCVISD::VMV_V_X_VL: { |
15663 | 0 | const MVT VT = N->getSimpleValueType(0); |
15664 | 0 | SDValue Passthru = N->getOperand(0); |
15665 | 0 | SDValue Scalar = N->getOperand(1); |
15666 | 0 | SDValue VL = N->getOperand(2); |
15667 | | |
15668 | | // Tail agnostic VMV.V.X only demands the vector element bitwidth from the |
15669 | | // scalar input. |
15670 | 0 | unsigned ScalarSize = Scalar.getValueSizeInBits(); |
15671 | 0 | unsigned EltWidth = VT.getScalarSizeInBits(); |
15672 | 0 | if (ScalarSize > EltWidth && Passthru.isUndef()) |
15673 | 0 | if (SimplifyDemandedLowBitsHelper(1, EltWidth)) |
15674 | 0 | return SDValue(N, 0); |
15675 | | |
15676 | | // If VL is 1 and the scalar value won't benefit from immediate, we can |
15677 | | // use vmv.s.x. |
15678 | 0 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); |
15679 | 0 | if (isOneConstant(VL) && |
15680 | 0 | (!Const || Const->isZero() || |
15681 | 0 | !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5))) |
15682 | 0 | return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); |
15683 | | |
15684 | 0 | break; |
15685 | 0 | } |
15686 | 0 | case RISCVISD::VFMV_S_F_VL: { |
15687 | 0 | SDValue Src = N->getOperand(1); |
15688 | | // Try to remove vector->scalar->vector if the scalar->vector is inserting |
15689 | | // into an undef vector. |
15690 | | // TODO: Could use a vslide or vmv.v.v for non-undef. |
15691 | 0 | if (N->getOperand(0).isUndef() && |
15692 | 0 | Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
15693 | 0 | isNullConstant(Src.getOperand(1)) && |
15694 | 0 | Src.getOperand(0).getValueType().isScalableVector()) { |
15695 | 0 | EVT VT = N->getValueType(0); |
15696 | 0 | EVT SrcVT = Src.getOperand(0).getValueType(); |
15697 | 0 | assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); |
15698 | | // Widths match, just return the original vector. |
15699 | 0 | if (SrcVT == VT) |
15700 | 0 | return Src.getOperand(0); |
15701 | | // TODO: Use insert_subvector/extract_subvector to change widen/narrow? |
15702 | 0 | } |
15703 | 0 | [[fallthrough]]; |
15704 | 0 | } |
15705 | 0 | case RISCVISD::VMV_S_X_VL: { |
15706 | 0 | const MVT VT = N->getSimpleValueType(0); |
15707 | 0 | SDValue Passthru = N->getOperand(0); |
15708 | 0 | SDValue Scalar = N->getOperand(1); |
15709 | 0 | SDValue VL = N->getOperand(2); |
15710 | | |
15711 | | // Use M1 or smaller to avoid over constraining register allocation |
15712 | 0 | const MVT M1VT = getLMUL1VT(VT); |
15713 | 0 | if (M1VT.bitsLT(VT)) { |
15714 | 0 | SDValue M1Passthru = |
15715 | 0 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru, |
15716 | 0 | DAG.getVectorIdxConstant(0, DL)); |
15717 | 0 | SDValue Result = |
15718 | 0 | DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL); |
15719 | 0 | Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result, |
15720 | 0 | DAG.getConstant(0, DL, XLenVT)); |
15721 | 0 | return Result; |
15722 | 0 | } |
15723 | | |
15724 | | // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or |
15725 | | // higher would involve overly constraining the register allocator for |
15726 | | // no purpose. |
15727 | 0 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); |
15728 | 0 | Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) && |
15729 | 0 | VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) |
15730 | 0 | return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); |
15731 | | |
15732 | 0 | break; |
15733 | 0 | } |
15734 | 0 | case RISCVISD::VMV_X_S: { |
15735 | 0 | SDValue Vec = N->getOperand(0); |
15736 | 0 | MVT VecVT = N->getOperand(0).getSimpleValueType(); |
15737 | 0 | const MVT M1VT = getLMUL1VT(VecVT); |
15738 | 0 | if (M1VT.bitsLT(VecVT)) { |
15739 | 0 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, |
15740 | 0 | DAG.getVectorIdxConstant(0, DL)); |
15741 | 0 | return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec); |
15742 | 0 | } |
15743 | 0 | break; |
15744 | 0 | } |
15745 | 0 | case ISD::INTRINSIC_VOID: |
15746 | 0 | case ISD::INTRINSIC_W_CHAIN: |
15747 | 0 | case ISD::INTRINSIC_WO_CHAIN: { |
15748 | 0 | unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; |
15749 | 0 | unsigned IntNo = N->getConstantOperandVal(IntOpNo); |
15750 | 0 | switch (IntNo) { |
15751 | | // By default we do not combine any intrinsic. |
15752 | 0 | default: |
15753 | 0 | return SDValue(); |
15754 | 0 | case Intrinsic::riscv_masked_strided_load: { |
15755 | 0 | MVT VT = N->getSimpleValueType(0); |
15756 | 0 | auto *Load = cast<MemIntrinsicSDNode>(N); |
15757 | 0 | SDValue PassThru = N->getOperand(2); |
15758 | 0 | SDValue Base = N->getOperand(3); |
15759 | 0 | SDValue Stride = N->getOperand(4); |
15760 | 0 | SDValue Mask = N->getOperand(5); |
15761 | | |
15762 | | // If the stride is equal to the element size in bytes, we can use |
15763 | | // a masked.load. |
15764 | 0 | const unsigned ElementSize = VT.getScalarStoreSize(); |
15765 | 0 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); |
15766 | 0 | StrideC && StrideC->getZExtValue() == ElementSize) |
15767 | 0 | return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base, |
15768 | 0 | DAG.getUNDEF(XLenVT), Mask, PassThru, |
15769 | 0 | Load->getMemoryVT(), Load->getMemOperand(), |
15770 | 0 | ISD::UNINDEXED, ISD::NON_EXTLOAD); |
15771 | 0 | return SDValue(); |
15772 | 0 | } |
15773 | 0 | case Intrinsic::riscv_masked_strided_store: { |
15774 | 0 | auto *Store = cast<MemIntrinsicSDNode>(N); |
15775 | 0 | SDValue Value = N->getOperand(2); |
15776 | 0 | SDValue Base = N->getOperand(3); |
15777 | 0 | SDValue Stride = N->getOperand(4); |
15778 | 0 | SDValue Mask = N->getOperand(5); |
15779 | | |
15780 | | // If the stride is equal to the element size in bytes, we can use |
15781 | | // a masked.store. |
15782 | 0 | const unsigned ElementSize = Value.getValueType().getScalarStoreSize(); |
15783 | 0 | if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride); |
15784 | 0 | StrideC && StrideC->getZExtValue() == ElementSize) |
15785 | 0 | return DAG.getMaskedStore(Store->getChain(), DL, Value, Base, |
15786 | 0 | DAG.getUNDEF(XLenVT), Mask, |
15787 | 0 | Store->getMemoryVT(), Store->getMemOperand(), |
15788 | 0 | ISD::UNINDEXED, false); |
15789 | 0 | return SDValue(); |
15790 | 0 | } |
15791 | 0 | case Intrinsic::riscv_vcpop: |
15792 | 0 | case Intrinsic::riscv_vcpop_mask: |
15793 | 0 | case Intrinsic::riscv_vfirst: |
15794 | 0 | case Intrinsic::riscv_vfirst_mask: { |
15795 | 0 | SDValue VL = N->getOperand(2); |
15796 | 0 | if (IntNo == Intrinsic::riscv_vcpop_mask || |
15797 | 0 | IntNo == Intrinsic::riscv_vfirst_mask) |
15798 | 0 | VL = N->getOperand(3); |
15799 | 0 | if (!isNullConstant(VL)) |
15800 | 0 | return SDValue(); |
15801 | | // If VL is 0, vcpop -> li 0, vfirst -> li -1. |
15802 | 0 | SDLoc DL(N); |
15803 | 0 | EVT VT = N->getValueType(0); |
15804 | 0 | if (IntNo == Intrinsic::riscv_vfirst || |
15805 | 0 | IntNo == Intrinsic::riscv_vfirst_mask) |
15806 | 0 | return DAG.getConstant(-1, DL, VT); |
15807 | 0 | return DAG.getConstant(0, DL, VT); |
15808 | 0 | } |
15809 | 0 | } |
15810 | 0 | } |
15811 | 0 | case ISD::BITCAST: { |
15812 | 0 | assert(Subtarget.useRVVForFixedLengthVectors()); |
15813 | 0 | SDValue N0 = N->getOperand(0); |
15814 | 0 | EVT VT = N->getValueType(0); |
15815 | 0 | EVT SrcVT = N0.getValueType(); |
15816 | | // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer |
15817 | | // type, widen both sides to avoid a trip through memory. |
15818 | 0 | if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && |
15819 | 0 | VT.isScalarInteger()) { |
15820 | 0 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
15821 | 0 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); |
15822 | 0 | Ops[0] = N0; |
15823 | 0 | SDLoc DL(N); |
15824 | 0 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); |
15825 | 0 | N0 = DAG.getBitcast(MVT::i8, N0); |
15826 | 0 | return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); |
15827 | 0 | } |
15828 | | |
15829 | 0 | return SDValue(); |
15830 | 0 | } |
15831 | 753k | } |
15832 | | |
15833 | 233k | return SDValue(); |
15834 | 753k | } |
15835 | | |
15836 | | bool RISCVTargetLowering::shouldTransformSignedTruncationCheck( |
15837 | 0 | EVT XVT, unsigned KeptBits) const { |
15838 | | // For vectors, we don't have a preference.. |
15839 | 0 | if (XVT.isVector()) |
15840 | 0 | return false; |
15841 | | |
15842 | 0 | if (XVT != MVT::i32 && XVT != MVT::i64) |
15843 | 0 | return false; |
15844 | | |
15845 | | // We can use sext.w for RV64 or an srai 31 on RV32. |
15846 | 0 | if (KeptBits == 32 || KeptBits == 64) |
15847 | 0 | return true; |
15848 | | |
15849 | | // With Zbb we can use sext.h/sext.b. |
15850 | 0 | return Subtarget.hasStdExtZbb() && |
15851 | 0 | ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) || |
15852 | 0 | KeptBits == 16); |
15853 | 0 | } |
15854 | | |
15855 | | bool RISCVTargetLowering::isDesirableToCommuteWithShift( |
15856 | 36.0k | const SDNode *N, CombineLevel Level) const { |
15857 | 36.0k | assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || |
15858 | 36.0k | N->getOpcode() == ISD::SRL) && |
15859 | 36.0k | "Expected shift op"); |
15860 | | |
15861 | | // The following folds are only desirable if `(OP _, c1 << c2)` can be |
15862 | | // materialised in fewer instructions than `(OP _, c1)`: |
15863 | | // |
15864 | | // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) |
15865 | | // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) |
15866 | 0 | SDValue N0 = N->getOperand(0); |
15867 | 36.0k | EVT Ty = N0.getValueType(); |
15868 | 36.0k | if (Ty.isScalarInteger() && |
15869 | 36.0k | (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { |
15870 | 1.13k | auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); |
15871 | 1.13k | auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
15872 | 1.13k | if (C1 && C2) { |
15873 | 439 | const APInt &C1Int = C1->getAPIntValue(); |
15874 | 439 | APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); |
15875 | | |
15876 | | // We can materialise `c1 << c2` into an add immediate, so it's "free", |
15877 | | // and the combine should happen, to potentially allow further combines |
15878 | | // later. |
15879 | 439 | if (ShiftedC1Int.getSignificantBits() <= 64 && |
15880 | 439 | isLegalAddImmediate(ShiftedC1Int.getSExtValue())) |
15881 | 343 | return true; |
15882 | | |
15883 | | // We can materialise `c1` in an add immediate, so it's "free", and the |
15884 | | // combine should be prevented. |
15885 | 96 | if (C1Int.getSignificantBits() <= 64 && |
15886 | 96 | isLegalAddImmediate(C1Int.getSExtValue())) |
15887 | 70 | return false; |
15888 | | |
15889 | | // Neither constant will fit into an immediate, so find materialisation |
15890 | | // costs. |
15891 | 26 | int C1Cost = |
15892 | 26 | RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget, |
15893 | 26 | /*CompressionCost*/ true); |
15894 | 26 | int ShiftedC1Cost = RISCVMatInt::getIntMatCost( |
15895 | 26 | ShiftedC1Int, Ty.getSizeInBits(), Subtarget, |
15896 | 26 | /*CompressionCost*/ true); |
15897 | | |
15898 | | // Materialising `c1` is cheaper than materialising `c1 << c2`, so the |
15899 | | // combine should be prevented. |
15900 | 26 | if (C1Cost < ShiftedC1Cost) |
15901 | 18 | return false; |
15902 | 26 | } |
15903 | 1.13k | } |
15904 | 35.5k | return true; |
15905 | 36.0k | } |
15906 | | |
15907 | | bool RISCVTargetLowering::targetShrinkDemandedConstant( |
15908 | | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
15909 | 534k | TargetLoweringOpt &TLO) const { |
15910 | | // Delay this optimization as late as possible. |
15911 | 534k | if (!TLO.LegalOps) |
15912 | 222k | return false; |
15913 | | |
15914 | 312k | EVT VT = Op.getValueType(); |
15915 | 312k | if (VT.isVector()) |
15916 | 0 | return false; |
15917 | | |
15918 | 312k | unsigned Opcode = Op.getOpcode(); |
15919 | 312k | if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) |
15920 | 0 | return false; |
15921 | | |
15922 | 312k | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
15923 | 312k | if (!C) |
15924 | 93.1k | return false; |
15925 | | |
15926 | 219k | const APInt &Mask = C->getAPIntValue(); |
15927 | | |
15928 | | // Clear all non-demanded bits initially. |
15929 | 219k | APInt ShrunkMask = Mask & DemandedBits; |
15930 | | |
15931 | | // Try to make a smaller immediate by setting undemanded bits. |
15932 | | |
15933 | 219k | APInt ExpandedMask = Mask | ~DemandedBits; |
15934 | | |
15935 | 219k | auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { |
15936 | 46.0k | return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); |
15937 | 46.0k | }; |
15938 | 219k | auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { |
15939 | 21.0k | if (NewMask == Mask) |
15940 | 19.8k | return true; |
15941 | 1.18k | SDLoc DL(Op); |
15942 | 1.18k | SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); |
15943 | 1.18k | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), |
15944 | 1.18k | Op.getOperand(0), NewC); |
15945 | 1.18k | return TLO.CombineTo(Op, NewOp); |
15946 | 21.0k | }; |
15947 | | |
15948 | | // If the shrunk mask fits in sign extended 12 bits, let the target |
15949 | | // independent code apply it. |
15950 | 219k | if (ShrunkMask.isSignedIntN(12)) |
15951 | 191k | return false; |
15952 | | |
15953 | | // And has a few special cases for zext. |
15954 | 28.0k | if (Opcode == ISD::AND) { |
15955 | | // Preserve (and X, 0xffff), if zext.h exists use zext.h, |
15956 | | // otherwise use SLLI + SRLI. |
15957 | 24.1k | APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); |
15958 | 24.1k | if (IsLegalMask(NewMask)) |
15959 | 6.66k | return UseMask(NewMask); |
15960 | | |
15961 | | // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. |
15962 | 17.5k | if (VT == MVT::i64) { |
15963 | 17.5k | APInt NewMask = APInt(64, 0xffffffff); |
15964 | 17.5k | if (IsLegalMask(NewMask)) |
15965 | 9.99k | return UseMask(NewMask); |
15966 | 17.5k | } |
15967 | 17.5k | } |
15968 | | |
15969 | | // For the remaining optimizations, we need to be able to make a negative |
15970 | | // number through a combination of mask and undemanded bits. |
15971 | 11.4k | if (!ExpandedMask.isNegative()) |
15972 | 3.25k | return false; |
15973 | | |
15974 | | // What is the fewest number of bits we need to represent the negative number. |
15975 | 8.17k | unsigned MinSignedBits = ExpandedMask.getSignificantBits(); |
15976 | | |
15977 | | // Try to make a 12 bit negative immediate. If that fails try to make a 32 |
15978 | | // bit negative immediate unless the shrunk immediate already fits in 32 bits. |
15979 | | // If we can't create a simm12, we shouldn't change opaque constants. |
15980 | 8.17k | APInt NewMask = ShrunkMask; |
15981 | 8.17k | if (MinSignedBits <= 12) |
15982 | 2.96k | NewMask.setBitsFrom(11); |
15983 | 5.21k | else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) |
15984 | 1.41k | NewMask.setBitsFrom(31); |
15985 | 3.79k | else |
15986 | 3.79k | return false; |
15987 | | |
15988 | | // Check that our new mask is a subset of the demanded mask. |
15989 | 4.37k | assert(IsLegalMask(NewMask)); |
15990 | 0 | return UseMask(NewMask); |
15991 | 8.17k | } |
15992 | | |
15993 | 0 | static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { |
15994 | 0 | static const uint64_t GREVMasks[] = { |
15995 | 0 | 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, |
15996 | 0 | 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; |
15997 | |
|
15998 | 0 | for (unsigned Stage = 0; Stage != 6; ++Stage) { |
15999 | 0 | unsigned Shift = 1 << Stage; |
16000 | 0 | if (ShAmt & Shift) { |
16001 | 0 | uint64_t Mask = GREVMasks[Stage]; |
16002 | 0 | uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); |
16003 | 0 | if (IsGORC) |
16004 | 0 | Res |= x; |
16005 | 0 | x = Res; |
16006 | 0 | } |
16007 | 0 | } |
16008 | |
|
16009 | 0 | return x; |
16010 | 0 | } |
16011 | | |
16012 | | void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
16013 | | KnownBits &Known, |
16014 | | const APInt &DemandedElts, |
16015 | | const SelectionDAG &DAG, |
16016 | 12.4k | unsigned Depth) const { |
16017 | 12.4k | unsigned BitWidth = Known.getBitWidth(); |
16018 | 12.4k | unsigned Opc = Op.getOpcode(); |
16019 | 12.4k | assert((Opc >= ISD::BUILTIN_OP_END || |
16020 | 12.4k | Opc == ISD::INTRINSIC_WO_CHAIN || |
16021 | 12.4k | Opc == ISD::INTRINSIC_W_CHAIN || |
16022 | 12.4k | Opc == ISD::INTRINSIC_VOID) && |
16023 | 12.4k | "Should use MaskedValueIsZero if you don't know whether Op" |
16024 | 12.4k | " is a target node!"); |
16025 | | |
16026 | 0 | Known.resetAll(); |
16027 | 12.4k | switch (Opc) { |
16028 | 8.28k | default: break; |
16029 | 8.28k | case RISCVISD::SELECT_CC: { |
16030 | 0 | Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); |
16031 | | // If we don't know any bits, early out. |
16032 | 0 | if (Known.isUnknown()) |
16033 | 0 | break; |
16034 | 0 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); |
16035 | | |
16036 | | // Only known if known in both the LHS and RHS. |
16037 | 0 | Known = Known.intersectWith(Known2); |
16038 | 0 | break; |
16039 | 0 | } |
16040 | 0 | case RISCVISD::CZERO_EQZ: |
16041 | 0 | case RISCVISD::CZERO_NEZ: |
16042 | 0 | Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
16043 | | // Result is either all zero or operand 0. We can propagate zeros, but not |
16044 | | // ones. |
16045 | 0 | Known.One.clearAllBits(); |
16046 | 0 | break; |
16047 | 0 | case RISCVISD::REMUW: { |
16048 | 0 | KnownBits Known2; |
16049 | 0 | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
16050 | 0 | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
16051 | | // We only care about the lower 32 bits. |
16052 | 0 | Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); |
16053 | | // Restore the original width by sign extending. |
16054 | 0 | Known = Known.sext(BitWidth); |
16055 | 0 | break; |
16056 | 0 | } |
16057 | 0 | case RISCVISD::DIVUW: { |
16058 | 0 | KnownBits Known2; |
16059 | 0 | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
16060 | 0 | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
16061 | | // We only care about the lower 32 bits. |
16062 | 0 | Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); |
16063 | | // Restore the original width by sign extending. |
16064 | 0 | Known = Known.sext(BitWidth); |
16065 | 0 | break; |
16066 | 0 | } |
16067 | 4.19k | case RISCVISD::SLLW: { |
16068 | 4.19k | KnownBits Known2; |
16069 | 4.19k | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
16070 | 4.19k | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
16071 | 4.19k | Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32)); |
16072 | | // Restore the original width by sign extending. |
16073 | 4.19k | Known = Known.sext(BitWidth); |
16074 | 4.19k | break; |
16075 | 0 | } |
16076 | 0 | case RISCVISD::CTZW: { |
16077 | 0 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
16078 | 0 | unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); |
16079 | 0 | unsigned LowBits = llvm::bit_width(PossibleTZ); |
16080 | 0 | Known.Zero.setBitsFrom(LowBits); |
16081 | 0 | break; |
16082 | 0 | } |
16083 | 0 | case RISCVISD::CLZW: { |
16084 | 0 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
16085 | 0 | unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); |
16086 | 0 | unsigned LowBits = llvm::bit_width(PossibleLZ); |
16087 | 0 | Known.Zero.setBitsFrom(LowBits); |
16088 | 0 | break; |
16089 | 0 | } |
16090 | 0 | case RISCVISD::BREV8: |
16091 | 0 | case RISCVISD::ORC_B: { |
16092 | | // FIXME: This is based on the non-ratified Zbp GREV and GORC where a |
16093 | | // control value of 7 is equivalent to brev8 and orc.b. |
16094 | 0 | Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
16095 | 0 | bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; |
16096 | | // To compute zeros, we need to invert the value and invert it back after. |
16097 | 0 | Known.Zero = |
16098 | 0 | ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); |
16099 | 0 | Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); |
16100 | 0 | break; |
16101 | 0 | } |
16102 | 0 | case RISCVISD::READ_VLENB: { |
16103 | | // We can use the minimum and maximum VLEN values to bound VLENB. We |
16104 | | // know VLEN must be a power of two. |
16105 | 0 | const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; |
16106 | 0 | const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; |
16107 | 0 | assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); |
16108 | 0 | Known.Zero.setLowBits(Log2_32(MinVLenB)); |
16109 | 0 | Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); |
16110 | 0 | if (MaxVLenB == MinVLenB) |
16111 | 0 | Known.One.setBit(Log2_32(MinVLenB)); |
16112 | 0 | break; |
16113 | 0 | } |
16114 | 0 | case RISCVISD::FCLASS: { |
16115 | | // fclass will only set one of the low 10 bits. |
16116 | 0 | Known.Zero.setBitsFrom(10); |
16117 | 0 | break; |
16118 | 0 | } |
16119 | 0 | case ISD::INTRINSIC_W_CHAIN: |
16120 | 0 | case ISD::INTRINSIC_WO_CHAIN: { |
16121 | 0 | unsigned IntNo = |
16122 | 0 | Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); |
16123 | 0 | switch (IntNo) { |
16124 | 0 | default: |
16125 | | // We can't do anything for most intrinsics. |
16126 | 0 | break; |
16127 | 0 | case Intrinsic::riscv_vsetvli: |
16128 | 0 | case Intrinsic::riscv_vsetvlimax: { |
16129 | 0 | bool HasAVL = IntNo == Intrinsic::riscv_vsetvli; |
16130 | 0 | unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1); |
16131 | 0 | RISCVII::VLMUL VLMUL = |
16132 | 0 | static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2)); |
16133 | 0 | unsigned SEW = RISCVVType::decodeVSEW(VSEW); |
16134 | 0 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL); |
16135 | 0 | uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW; |
16136 | 0 | MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul; |
16137 | | |
16138 | | // Result of vsetvli must be not larger than AVL. |
16139 | 0 | if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1))) |
16140 | 0 | MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1)); |
16141 | |
|
16142 | 0 | unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1; |
16143 | 0 | if (BitWidth > KnownZeroFirstBit) |
16144 | 0 | Known.Zero.setBitsFrom(KnownZeroFirstBit); |
16145 | 0 | break; |
16146 | 0 | } |
16147 | 0 | } |
16148 | 0 | break; |
16149 | 0 | } |
16150 | 12.4k | } |
16151 | 12.4k | } |
16152 | | |
16153 | | unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( |
16154 | | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
16155 | 769 | unsigned Depth) const { |
16156 | 769 | switch (Op.getOpcode()) { |
16157 | 0 | default: |
16158 | 0 | break; |
16159 | 0 | case RISCVISD::SELECT_CC: { |
16160 | 0 | unsigned Tmp = |
16161 | 0 | DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); |
16162 | 0 | if (Tmp == 1) return 1; // Early out. |
16163 | 0 | unsigned Tmp2 = |
16164 | 0 | DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); |
16165 | 0 | return std::min(Tmp, Tmp2); |
16166 | 0 | } |
16167 | 0 | case RISCVISD::CZERO_EQZ: |
16168 | 0 | case RISCVISD::CZERO_NEZ: |
16169 | | // Output is either all zero or operand 0. We can propagate sign bit count |
16170 | | // from operand 0. |
16171 | 0 | return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); |
16172 | 0 | case RISCVISD::ABSW: { |
16173 | | // We expand this at isel to negw+max. The result will have 33 sign bits |
16174 | | // if the input has at least 33 sign bits. |
16175 | 0 | unsigned Tmp = |
16176 | 0 | DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); |
16177 | 0 | if (Tmp < 33) return 1; |
16178 | 0 | return 33; |
16179 | 0 | } |
16180 | 115 | case RISCVISD::SLLW: |
16181 | 404 | case RISCVISD::SRAW: |
16182 | 769 | case RISCVISD::SRLW: |
16183 | 769 | case RISCVISD::DIVW: |
16184 | 769 | case RISCVISD::DIVUW: |
16185 | 769 | case RISCVISD::REMUW: |
16186 | 769 | case RISCVISD::ROLW: |
16187 | 769 | case RISCVISD::RORW: |
16188 | 769 | case RISCVISD::FCVT_W_RV64: |
16189 | 769 | case RISCVISD::FCVT_WU_RV64: |
16190 | 769 | case RISCVISD::STRICT_FCVT_W_RV64: |
16191 | 769 | case RISCVISD::STRICT_FCVT_WU_RV64: |
16192 | | // TODO: As the result is sign-extended, this is conservatively correct. A |
16193 | | // more precise answer could be calculated for SRAW depending on known |
16194 | | // bits in the shift amount. |
16195 | 769 | return 33; |
16196 | 0 | case RISCVISD::VMV_X_S: { |
16197 | | // The number of sign bits of the scalar result is computed by obtaining the |
16198 | | // element type of the input vector operand, subtracting its width from the |
16199 | | // XLEN, and then adding one (sign bit within the element type). If the |
16200 | | // element type is wider than XLen, the least-significant XLEN bits are |
16201 | | // taken. |
16202 | 0 | unsigned XLen = Subtarget.getXLen(); |
16203 | 0 | unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); |
16204 | 0 | if (EltBits <= XLen) |
16205 | 0 | return XLen - EltBits + 1; |
16206 | 0 | break; |
16207 | 0 | } |
16208 | 0 | case ISD::INTRINSIC_W_CHAIN: { |
16209 | 0 | unsigned IntNo = Op.getConstantOperandVal(1); |
16210 | 0 | switch (IntNo) { |
16211 | 0 | default: |
16212 | 0 | break; |
16213 | 0 | case Intrinsic::riscv_masked_atomicrmw_xchg_i64: |
16214 | 0 | case Intrinsic::riscv_masked_atomicrmw_add_i64: |
16215 | 0 | case Intrinsic::riscv_masked_atomicrmw_sub_i64: |
16216 | 0 | case Intrinsic::riscv_masked_atomicrmw_nand_i64: |
16217 | 0 | case Intrinsic::riscv_masked_atomicrmw_max_i64: |
16218 | 0 | case Intrinsic::riscv_masked_atomicrmw_min_i64: |
16219 | 0 | case Intrinsic::riscv_masked_atomicrmw_umax_i64: |
16220 | 0 | case Intrinsic::riscv_masked_atomicrmw_umin_i64: |
16221 | 0 | case Intrinsic::riscv_masked_cmpxchg_i64: |
16222 | | // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated |
16223 | | // narrow atomic operation. These are implemented using atomic |
16224 | | // operations at the minimum supported atomicrmw/cmpxchg width whose |
16225 | | // result is then sign extended to XLEN. With +A, the minimum width is |
16226 | | // 32 for both 64 and 32. |
16227 | 0 | assert(Subtarget.getXLen() == 64); |
16228 | 0 | assert(getMinCmpXchgSizeInBits() == 32); |
16229 | 0 | assert(Subtarget.hasStdExtA()); |
16230 | 0 | return 33; |
16231 | 0 | } |
16232 | 0 | break; |
16233 | 0 | } |
16234 | 769 | } |
16235 | | |
16236 | 0 | return 1; |
16237 | 769 | } |
16238 | | |
16239 | | const Constant * |
16240 | 1.08M | RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { |
16241 | 1.08M | assert(Ld && "Unexpected null LoadSDNode"); |
16242 | 1.08M | if (!ISD::isNormalLoad(Ld)) |
16243 | 807k | return nullptr; |
16244 | | |
16245 | 282k | SDValue Ptr = Ld->getBasePtr(); |
16246 | | |
16247 | | // Only constant pools with no offset are supported. |
16248 | 282k | auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { |
16249 | 33.1k | auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); |
16250 | 33.1k | if (!CNode || CNode->isMachineConstantPoolEntry() || |
16251 | 33.1k | CNode->getOffset() != 0) |
16252 | 33.1k | return nullptr; |
16253 | | |
16254 | 0 | return CNode; |
16255 | 33.1k | }; |
16256 | | |
16257 | | // Simple case, LLA. |
16258 | 282k | if (Ptr.getOpcode() == RISCVISD::LLA) { |
16259 | 0 | auto *CNode = GetSupportedConstantPool(Ptr); |
16260 | 0 | if (!CNode || CNode->getTargetFlags() != 0) |
16261 | 0 | return nullptr; |
16262 | | |
16263 | 0 | return CNode->getConstVal(); |
16264 | 0 | } |
16265 | | |
16266 | | // Look for a HI and ADD_LO pair. |
16267 | 282k | if (Ptr.getOpcode() != RISCVISD::ADD_LO || |
16268 | 282k | Ptr.getOperand(0).getOpcode() != RISCVISD::HI) |
16269 | 265k | return nullptr; |
16270 | | |
16271 | 16.5k | auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); |
16272 | 16.5k | auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); |
16273 | | |
16274 | 16.5k | if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || |
16275 | 16.5k | !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) |
16276 | 16.5k | return nullptr; |
16277 | | |
16278 | 0 | if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) |
16279 | 0 | return nullptr; |
16280 | | |
16281 | 0 | return CNodeLo->getConstVal(); |
16282 | 0 | } |
16283 | | |
16284 | | static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, |
16285 | 0 | MachineBasicBlock *BB) { |
16286 | 0 | assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); |
16287 | | |
16288 | | // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. |
16289 | | // Should the count have wrapped while it was being read, we need to try |
16290 | | // again. |
16291 | | // ... |
16292 | | // read: |
16293 | | // rdcycleh x3 # load high word of cycle |
16294 | | // rdcycle x2 # load low word of cycle |
16295 | | // rdcycleh x4 # load high word of cycle |
16296 | | // bne x3, x4, read # check if high word reads match, otherwise try again |
16297 | | // ... |
16298 | | |
16299 | 0 | MachineFunction &MF = *BB->getParent(); |
16300 | 0 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
16301 | 0 | MachineFunction::iterator It = ++BB->getIterator(); |
16302 | |
|
16303 | 0 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); |
16304 | 0 | MF.insert(It, LoopMBB); |
16305 | |
|
16306 | 0 | MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); |
16307 | 0 | MF.insert(It, DoneMBB); |
16308 | | |
16309 | | // Transfer the remainder of BB and its successor edges to DoneMBB. |
16310 | 0 | DoneMBB->splice(DoneMBB->begin(), BB, |
16311 | 0 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
16312 | 0 | DoneMBB->transferSuccessorsAndUpdatePHIs(BB); |
16313 | |
|
16314 | 0 | BB->addSuccessor(LoopMBB); |
16315 | |
|
16316 | 0 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
16317 | 0 | Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
16318 | 0 | Register LoReg = MI.getOperand(0).getReg(); |
16319 | 0 | Register HiReg = MI.getOperand(1).getReg(); |
16320 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16321 | |
|
16322 | 0 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
16323 | 0 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) |
16324 | 0 | .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) |
16325 | 0 | .addReg(RISCV::X0); |
16326 | 0 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) |
16327 | 0 | .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) |
16328 | 0 | .addReg(RISCV::X0); |
16329 | 0 | BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) |
16330 | 0 | .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) |
16331 | 0 | .addReg(RISCV::X0); |
16332 | |
|
16333 | 0 | BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) |
16334 | 0 | .addReg(HiReg) |
16335 | 0 | .addReg(ReadAgainReg) |
16336 | 0 | .addMBB(LoopMBB); |
16337 | |
|
16338 | 0 | LoopMBB->addSuccessor(LoopMBB); |
16339 | 0 | LoopMBB->addSuccessor(DoneMBB); |
16340 | |
|
16341 | 0 | MI.eraseFromParent(); |
16342 | |
|
16343 | 0 | return DoneMBB; |
16344 | 0 | } |
16345 | | |
16346 | | static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, |
16347 | | MachineBasicBlock *BB, |
16348 | 0 | const RISCVSubtarget &Subtarget) { |
16349 | 0 | assert((MI.getOpcode() == RISCV::SplitF64Pseudo || |
16350 | 0 | MI.getOpcode() == RISCV::SplitF64Pseudo_INX) && |
16351 | 0 | "Unexpected instruction"); |
16352 | | |
16353 | 0 | MachineFunction &MF = *BB->getParent(); |
16354 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16355 | 0 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
16356 | 0 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
16357 | 0 | Register LoReg = MI.getOperand(0).getReg(); |
16358 | 0 | Register HiReg = MI.getOperand(1).getReg(); |
16359 | 0 | Register SrcReg = MI.getOperand(2).getReg(); |
16360 | |
|
16361 | 0 | const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX |
16362 | 0 | ? &RISCV::GPRPairRegClass |
16363 | 0 | : &RISCV::FPR64RegClass; |
16364 | 0 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
16365 | |
|
16366 | 0 | TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, |
16367 | 0 | RI, Register()); |
16368 | 0 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
16369 | 0 | MachineMemOperand *MMOLo = |
16370 | 0 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); |
16371 | 0 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
16372 | 0 | MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); |
16373 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) |
16374 | 0 | .addFrameIndex(FI) |
16375 | 0 | .addImm(0) |
16376 | 0 | .addMemOperand(MMOLo); |
16377 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) |
16378 | 0 | .addFrameIndex(FI) |
16379 | 0 | .addImm(4) |
16380 | 0 | .addMemOperand(MMOHi); |
16381 | 0 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
16382 | 0 | return BB; |
16383 | 0 | } |
16384 | | |
16385 | | static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, |
16386 | | MachineBasicBlock *BB, |
16387 | 0 | const RISCVSubtarget &Subtarget) { |
16388 | 0 | assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo || |
16389 | 0 | MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) && |
16390 | 0 | "Unexpected instruction"); |
16391 | | |
16392 | 0 | MachineFunction &MF = *BB->getParent(); |
16393 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16394 | 0 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
16395 | 0 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
16396 | 0 | Register DstReg = MI.getOperand(0).getReg(); |
16397 | 0 | Register LoReg = MI.getOperand(1).getReg(); |
16398 | 0 | Register HiReg = MI.getOperand(2).getReg(); |
16399 | |
|
16400 | 0 | const TargetRegisterClass *DstRC = |
16401 | 0 | MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass |
16402 | 0 | : &RISCV::FPR64RegClass; |
16403 | 0 | int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); |
16404 | |
|
16405 | 0 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
16406 | 0 | MachineMemOperand *MMOLo = |
16407 | 0 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); |
16408 | 0 | MachineMemOperand *MMOHi = MF.getMachineMemOperand( |
16409 | 0 | MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); |
16410 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) |
16411 | 0 | .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) |
16412 | 0 | .addFrameIndex(FI) |
16413 | 0 | .addImm(0) |
16414 | 0 | .addMemOperand(MMOLo); |
16415 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) |
16416 | 0 | .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) |
16417 | 0 | .addFrameIndex(FI) |
16418 | 0 | .addImm(4) |
16419 | 0 | .addMemOperand(MMOHi); |
16420 | 0 | TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); |
16421 | 0 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
16422 | 0 | return BB; |
16423 | 0 | } |
16424 | | |
16425 | 0 | static bool isSelectPseudo(MachineInstr &MI) { |
16426 | 0 | switch (MI.getOpcode()) { |
16427 | 0 | default: |
16428 | 0 | return false; |
16429 | 0 | case RISCV::Select_GPR_Using_CC_GPR: |
16430 | 0 | case RISCV::Select_FPR16_Using_CC_GPR: |
16431 | 0 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
16432 | 0 | case RISCV::Select_FPR32_Using_CC_GPR: |
16433 | 0 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
16434 | 0 | case RISCV::Select_FPR64_Using_CC_GPR: |
16435 | 0 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
16436 | 0 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
16437 | 0 | return true; |
16438 | 0 | } |
16439 | 0 | } |
16440 | | |
16441 | | static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, |
16442 | | unsigned RelOpcode, unsigned EqOpcode, |
16443 | 0 | const RISCVSubtarget &Subtarget) { |
16444 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16445 | 0 | Register DstReg = MI.getOperand(0).getReg(); |
16446 | 0 | Register Src1Reg = MI.getOperand(1).getReg(); |
16447 | 0 | Register Src2Reg = MI.getOperand(2).getReg(); |
16448 | 0 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
16449 | 0 | Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
16450 | 0 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
16451 | | |
16452 | | // Save the current FFLAGS. |
16453 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); |
16454 | |
|
16455 | 0 | auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) |
16456 | 0 | .addReg(Src1Reg) |
16457 | 0 | .addReg(Src2Reg); |
16458 | 0 | if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) |
16459 | 0 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
16460 | | |
16461 | | // Restore the FFLAGS. |
16462 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) |
16463 | 0 | .addReg(SavedFFlags, RegState::Kill); |
16464 | | |
16465 | | // Issue a dummy FEQ opcode to raise exception for signaling NaNs. |
16466 | 0 | auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) |
16467 | 0 | .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) |
16468 | 0 | .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); |
16469 | 0 | if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) |
16470 | 0 | MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); |
16471 | | |
16472 | | // Erase the pseudoinstruction. |
16473 | 0 | MI.eraseFromParent(); |
16474 | 0 | return BB; |
16475 | 0 | } |
16476 | | |
16477 | | static MachineBasicBlock * |
16478 | | EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, |
16479 | | MachineBasicBlock *ThisMBB, |
16480 | 0 | const RISCVSubtarget &Subtarget) { |
16481 | | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) |
16482 | | // Without this, custom-inserter would have generated: |
16483 | | // |
16484 | | // A |
16485 | | // | \ |
16486 | | // | B |
16487 | | // | / |
16488 | | // C |
16489 | | // | \ |
16490 | | // | D |
16491 | | // | / |
16492 | | // E |
16493 | | // |
16494 | | // A: X = ...; Y = ... |
16495 | | // B: empty |
16496 | | // C: Z = PHI [X, A], [Y, B] |
16497 | | // D: empty |
16498 | | // E: PHI [X, C], [Z, D] |
16499 | | // |
16500 | | // If we lower both Select_FPRX_ in a single step, we can instead generate: |
16501 | | // |
16502 | | // A |
16503 | | // | \ |
16504 | | // | C |
16505 | | // | /| |
16506 | | // |/ | |
16507 | | // | | |
16508 | | // | D |
16509 | | // | / |
16510 | | // E |
16511 | | // |
16512 | | // A: X = ...; Y = ... |
16513 | | // D: empty |
16514 | | // E: PHI [X, A], [X, C], [Y, D] |
16515 | |
|
16516 | 0 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
16517 | 0 | const DebugLoc &DL = First.getDebugLoc(); |
16518 | 0 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
16519 | 0 | MachineFunction *F = ThisMBB->getParent(); |
16520 | 0 | MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); |
16521 | 0 | MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); |
16522 | 0 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
16523 | 0 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
16524 | 0 | F->insert(It, FirstMBB); |
16525 | 0 | F->insert(It, SecondMBB); |
16526 | 0 | F->insert(It, SinkMBB); |
16527 | | |
16528 | | // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. |
16529 | 0 | SinkMBB->splice(SinkMBB->begin(), ThisMBB, |
16530 | 0 | std::next(MachineBasicBlock::iterator(First)), |
16531 | 0 | ThisMBB->end()); |
16532 | 0 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
16533 | | |
16534 | | // Fallthrough block for ThisMBB. |
16535 | 0 | ThisMBB->addSuccessor(FirstMBB); |
16536 | | // Fallthrough block for FirstMBB. |
16537 | 0 | FirstMBB->addSuccessor(SecondMBB); |
16538 | 0 | ThisMBB->addSuccessor(SinkMBB); |
16539 | 0 | FirstMBB->addSuccessor(SinkMBB); |
16540 | | // This is fallthrough. |
16541 | 0 | SecondMBB->addSuccessor(SinkMBB); |
16542 | |
|
16543 | 0 | auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); |
16544 | 0 | Register FLHS = First.getOperand(1).getReg(); |
16545 | 0 | Register FRHS = First.getOperand(2).getReg(); |
16546 | | // Insert appropriate branch. |
16547 | 0 | BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) |
16548 | 0 | .addReg(FLHS) |
16549 | 0 | .addReg(FRHS) |
16550 | 0 | .addMBB(SinkMBB); |
16551 | |
|
16552 | 0 | Register SLHS = Second.getOperand(1).getReg(); |
16553 | 0 | Register SRHS = Second.getOperand(2).getReg(); |
16554 | 0 | Register Op1Reg4 = First.getOperand(4).getReg(); |
16555 | 0 | Register Op1Reg5 = First.getOperand(5).getReg(); |
16556 | |
|
16557 | 0 | auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); |
16558 | | // Insert appropriate branch. |
16559 | 0 | BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) |
16560 | 0 | .addReg(SLHS) |
16561 | 0 | .addReg(SRHS) |
16562 | 0 | .addMBB(SinkMBB); |
16563 | |
|
16564 | 0 | Register DestReg = Second.getOperand(0).getReg(); |
16565 | 0 | Register Op2Reg4 = Second.getOperand(4).getReg(); |
16566 | 0 | BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) |
16567 | 0 | .addReg(Op2Reg4) |
16568 | 0 | .addMBB(ThisMBB) |
16569 | 0 | .addReg(Op1Reg4) |
16570 | 0 | .addMBB(FirstMBB) |
16571 | 0 | .addReg(Op1Reg5) |
16572 | 0 | .addMBB(SecondMBB); |
16573 | | |
16574 | | // Now remove the Select_FPRX_s. |
16575 | 0 | First.eraseFromParent(); |
16576 | 0 | Second.eraseFromParent(); |
16577 | 0 | return SinkMBB; |
16578 | 0 | } |
16579 | | |
16580 | | static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, |
16581 | | MachineBasicBlock *BB, |
16582 | 0 | const RISCVSubtarget &Subtarget) { |
16583 | | // To "insert" Select_* instructions, we actually have to insert the triangle |
16584 | | // control-flow pattern. The incoming instructions know the destination vreg |
16585 | | // to set, the condition code register to branch on, the true/false values to |
16586 | | // select between, and the condcode to use to select the appropriate branch. |
16587 | | // |
16588 | | // We produce the following control flow: |
16589 | | // HeadMBB |
16590 | | // | \ |
16591 | | // | IfFalseMBB |
16592 | | // | / |
16593 | | // TailMBB |
16594 | | // |
16595 | | // When we find a sequence of selects we attempt to optimize their emission |
16596 | | // by sharing the control flow. Currently we only handle cases where we have |
16597 | | // multiple selects with the exact same condition (same LHS, RHS and CC). |
16598 | | // The selects may be interleaved with other instructions if the other |
16599 | | // instructions meet some requirements we deem safe: |
16600 | | // - They are not pseudo instructions. |
16601 | | // - They are debug instructions. Otherwise, |
16602 | | // - They do not have side-effects, do not access memory and their inputs do |
16603 | | // not depend on the results of the select pseudo-instructions. |
16604 | | // The TrueV/FalseV operands of the selects cannot depend on the result of |
16605 | | // previous selects in the sequence. |
16606 | | // These conditions could be further relaxed. See the X86 target for a |
16607 | | // related approach and more information. |
16608 | | // |
16609 | | // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) |
16610 | | // is checked here and handled by a separate function - |
16611 | | // EmitLoweredCascadedSelect. |
16612 | 0 | Register LHS = MI.getOperand(1).getReg(); |
16613 | 0 | Register RHS = MI.getOperand(2).getReg(); |
16614 | 0 | auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); |
16615 | |
|
16616 | 0 | SmallVector<MachineInstr *, 4> SelectDebugValues; |
16617 | 0 | SmallSet<Register, 4> SelectDests; |
16618 | 0 | SelectDests.insert(MI.getOperand(0).getReg()); |
16619 | |
|
16620 | 0 | MachineInstr *LastSelectPseudo = &MI; |
16621 | 0 | auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); |
16622 | 0 | if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && |
16623 | 0 | Next->getOpcode() == MI.getOpcode() && |
16624 | 0 | Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && |
16625 | 0 | Next->getOperand(5).isKill()) { |
16626 | 0 | return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); |
16627 | 0 | } |
16628 | | |
16629 | 0 | for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); |
16630 | 0 | SequenceMBBI != E; ++SequenceMBBI) { |
16631 | 0 | if (SequenceMBBI->isDebugInstr()) |
16632 | 0 | continue; |
16633 | 0 | if (isSelectPseudo(*SequenceMBBI)) { |
16634 | 0 | if (SequenceMBBI->getOperand(1).getReg() != LHS || |
16635 | 0 | SequenceMBBI->getOperand(2).getReg() != RHS || |
16636 | 0 | SequenceMBBI->getOperand(3).getImm() != CC || |
16637 | 0 | SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || |
16638 | 0 | SelectDests.count(SequenceMBBI->getOperand(5).getReg())) |
16639 | 0 | break; |
16640 | 0 | LastSelectPseudo = &*SequenceMBBI; |
16641 | 0 | SequenceMBBI->collectDebugValues(SelectDebugValues); |
16642 | 0 | SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); |
16643 | 0 | continue; |
16644 | 0 | } |
16645 | 0 | if (SequenceMBBI->hasUnmodeledSideEffects() || |
16646 | 0 | SequenceMBBI->mayLoadOrStore() || |
16647 | 0 | SequenceMBBI->usesCustomInsertionHook()) |
16648 | 0 | break; |
16649 | 0 | if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { |
16650 | 0 | return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); |
16651 | 0 | })) |
16652 | 0 | break; |
16653 | 0 | } |
16654 | |
|
16655 | 0 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
16656 | 0 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
16657 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16658 | 0 | MachineFunction::iterator I = ++BB->getIterator(); |
16659 | |
|
16660 | 0 | MachineBasicBlock *HeadMBB = BB; |
16661 | 0 | MachineFunction *F = BB->getParent(); |
16662 | 0 | MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); |
16663 | 0 | MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); |
16664 | |
|
16665 | 0 | F->insert(I, IfFalseMBB); |
16666 | 0 | F->insert(I, TailMBB); |
16667 | | |
16668 | | // Transfer debug instructions associated with the selects to TailMBB. |
16669 | 0 | for (MachineInstr *DebugInstr : SelectDebugValues) { |
16670 | 0 | TailMBB->push_back(DebugInstr->removeFromParent()); |
16671 | 0 | } |
16672 | | |
16673 | | // Move all instructions after the sequence to TailMBB. |
16674 | 0 | TailMBB->splice(TailMBB->end(), HeadMBB, |
16675 | 0 | std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); |
16676 | | // Update machine-CFG edges by transferring all successors of the current |
16677 | | // block to the new block which will contain the Phi nodes for the selects. |
16678 | 0 | TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); |
16679 | | // Set the successors for HeadMBB. |
16680 | 0 | HeadMBB->addSuccessor(IfFalseMBB); |
16681 | 0 | HeadMBB->addSuccessor(TailMBB); |
16682 | | |
16683 | | // Insert appropriate branch. |
16684 | 0 | BuildMI(HeadMBB, DL, TII.getBrCond(CC)) |
16685 | 0 | .addReg(LHS) |
16686 | 0 | .addReg(RHS) |
16687 | 0 | .addMBB(TailMBB); |
16688 | | |
16689 | | // IfFalseMBB just falls through to TailMBB. |
16690 | 0 | IfFalseMBB->addSuccessor(TailMBB); |
16691 | | |
16692 | | // Create PHIs for all of the select pseudo-instructions. |
16693 | 0 | auto SelectMBBI = MI.getIterator(); |
16694 | 0 | auto SelectEnd = std::next(LastSelectPseudo->getIterator()); |
16695 | 0 | auto InsertionPoint = TailMBB->begin(); |
16696 | 0 | while (SelectMBBI != SelectEnd) { |
16697 | 0 | auto Next = std::next(SelectMBBI); |
16698 | 0 | if (isSelectPseudo(*SelectMBBI)) { |
16699 | | // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] |
16700 | 0 | BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), |
16701 | 0 | TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) |
16702 | 0 | .addReg(SelectMBBI->getOperand(4).getReg()) |
16703 | 0 | .addMBB(HeadMBB) |
16704 | 0 | .addReg(SelectMBBI->getOperand(5).getReg()) |
16705 | 0 | .addMBB(IfFalseMBB); |
16706 | 0 | SelectMBBI->eraseFromParent(); |
16707 | 0 | } |
16708 | 0 | SelectMBBI = Next; |
16709 | 0 | } |
16710 | |
|
16711 | 0 | F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); |
16712 | 0 | return TailMBB; |
16713 | 0 | } |
16714 | | |
16715 | | static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, |
16716 | | MachineBasicBlock *BB, |
16717 | | unsigned CVTXOpc, |
16718 | 0 | unsigned CVTFOpc) { |
16719 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16720 | |
|
16721 | 0 | const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); |
16722 | |
|
16723 | 0 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
16724 | 0 | Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
16725 | | |
16726 | | // Save the old value of FFLAGS. |
16727 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); |
16728 | |
|
16729 | 0 | assert(MI.getNumOperands() == 7); |
16730 | | |
16731 | | // Emit a VFCVT_X_F |
16732 | 0 | const TargetRegisterInfo *TRI = |
16733 | 0 | BB->getParent()->getSubtarget().getRegisterInfo(); |
16734 | 0 | const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); |
16735 | 0 | Register Tmp = MRI.createVirtualRegister(RC); |
16736 | 0 | BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) |
16737 | 0 | .add(MI.getOperand(1)) |
16738 | 0 | .add(MI.getOperand(2)) |
16739 | 0 | .add(MI.getOperand(3)) |
16740 | 0 | .add(MachineOperand::CreateImm(7)) // frm = DYN |
16741 | 0 | .add(MI.getOperand(4)) |
16742 | 0 | .add(MI.getOperand(5)) |
16743 | 0 | .add(MI.getOperand(6)) |
16744 | 0 | .add(MachineOperand::CreateReg(RISCV::FRM, |
16745 | 0 | /*IsDef*/ false, |
16746 | 0 | /*IsImp*/ true)); |
16747 | | |
16748 | | // Emit a VFCVT_F_X |
16749 | 0 | BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) |
16750 | 0 | .add(MI.getOperand(0)) |
16751 | 0 | .add(MI.getOperand(1)) |
16752 | 0 | .addReg(Tmp) |
16753 | 0 | .add(MI.getOperand(3)) |
16754 | 0 | .add(MachineOperand::CreateImm(7)) // frm = DYN |
16755 | 0 | .add(MI.getOperand(4)) |
16756 | 0 | .add(MI.getOperand(5)) |
16757 | 0 | .add(MI.getOperand(6)) |
16758 | 0 | .add(MachineOperand::CreateReg(RISCV::FRM, |
16759 | 0 | /*IsDef*/ false, |
16760 | 0 | /*IsImp*/ true)); |
16761 | | |
16762 | | // Restore FFLAGS. |
16763 | 0 | BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) |
16764 | 0 | .addReg(SavedFFLAGS, RegState::Kill); |
16765 | | |
16766 | | // Erase the pseudoinstruction. |
16767 | 0 | MI.eraseFromParent(); |
16768 | 0 | return BB; |
16769 | 0 | } |
16770 | | |
16771 | | static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, |
16772 | 0 | const RISCVSubtarget &Subtarget) { |
16773 | 0 | unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; |
16774 | 0 | const TargetRegisterClass *RC; |
16775 | 0 | switch (MI.getOpcode()) { |
16776 | 0 | default: |
16777 | 0 | llvm_unreachable("Unexpected opcode"); |
16778 | 0 | case RISCV::PseudoFROUND_H: |
16779 | 0 | CmpOpc = RISCV::FLT_H; |
16780 | 0 | F2IOpc = RISCV::FCVT_W_H; |
16781 | 0 | I2FOpc = RISCV::FCVT_H_W; |
16782 | 0 | FSGNJOpc = RISCV::FSGNJ_H; |
16783 | 0 | FSGNJXOpc = RISCV::FSGNJX_H; |
16784 | 0 | RC = &RISCV::FPR16RegClass; |
16785 | 0 | break; |
16786 | 0 | case RISCV::PseudoFROUND_H_INX: |
16787 | 0 | CmpOpc = RISCV::FLT_H_INX; |
16788 | 0 | F2IOpc = RISCV::FCVT_W_H_INX; |
16789 | 0 | I2FOpc = RISCV::FCVT_H_W_INX; |
16790 | 0 | FSGNJOpc = RISCV::FSGNJ_H_INX; |
16791 | 0 | FSGNJXOpc = RISCV::FSGNJX_H_INX; |
16792 | 0 | RC = &RISCV::GPRF16RegClass; |
16793 | 0 | break; |
16794 | 0 | case RISCV::PseudoFROUND_S: |
16795 | 0 | CmpOpc = RISCV::FLT_S; |
16796 | 0 | F2IOpc = RISCV::FCVT_W_S; |
16797 | 0 | I2FOpc = RISCV::FCVT_S_W; |
16798 | 0 | FSGNJOpc = RISCV::FSGNJ_S; |
16799 | 0 | FSGNJXOpc = RISCV::FSGNJX_S; |
16800 | 0 | RC = &RISCV::FPR32RegClass; |
16801 | 0 | break; |
16802 | 0 | case RISCV::PseudoFROUND_S_INX: |
16803 | 0 | CmpOpc = RISCV::FLT_S_INX; |
16804 | 0 | F2IOpc = RISCV::FCVT_W_S_INX; |
16805 | 0 | I2FOpc = RISCV::FCVT_S_W_INX; |
16806 | 0 | FSGNJOpc = RISCV::FSGNJ_S_INX; |
16807 | 0 | FSGNJXOpc = RISCV::FSGNJX_S_INX; |
16808 | 0 | RC = &RISCV::GPRF32RegClass; |
16809 | 0 | break; |
16810 | 0 | case RISCV::PseudoFROUND_D: |
16811 | 0 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); |
16812 | 0 | CmpOpc = RISCV::FLT_D; |
16813 | 0 | F2IOpc = RISCV::FCVT_L_D; |
16814 | 0 | I2FOpc = RISCV::FCVT_D_L; |
16815 | 0 | FSGNJOpc = RISCV::FSGNJ_D; |
16816 | 0 | FSGNJXOpc = RISCV::FSGNJX_D; |
16817 | 0 | RC = &RISCV::FPR64RegClass; |
16818 | 0 | break; |
16819 | 0 | case RISCV::PseudoFROUND_D_INX: |
16820 | 0 | assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); |
16821 | 0 | CmpOpc = RISCV::FLT_D_INX; |
16822 | 0 | F2IOpc = RISCV::FCVT_L_D_INX; |
16823 | 0 | I2FOpc = RISCV::FCVT_D_L_INX; |
16824 | 0 | FSGNJOpc = RISCV::FSGNJ_D_INX; |
16825 | 0 | FSGNJXOpc = RISCV::FSGNJX_D_INX; |
16826 | 0 | RC = &RISCV::GPRRegClass; |
16827 | 0 | break; |
16828 | 0 | } |
16829 | | |
16830 | 0 | const BasicBlock *BB = MBB->getBasicBlock(); |
16831 | 0 | DebugLoc DL = MI.getDebugLoc(); |
16832 | 0 | MachineFunction::iterator I = ++MBB->getIterator(); |
16833 | |
|
16834 | 0 | MachineFunction *F = MBB->getParent(); |
16835 | 0 | MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); |
16836 | 0 | MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); |
16837 | |
|
16838 | 0 | F->insert(I, CvtMBB); |
16839 | 0 | F->insert(I, DoneMBB); |
16840 | | // Move all instructions after the sequence to DoneMBB. |
16841 | 0 | DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), |
16842 | 0 | MBB->end()); |
16843 | | // Update machine-CFG edges by transferring all successors of the current |
16844 | | // block to the new block which will contain the Phi nodes for the selects. |
16845 | 0 | DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); |
16846 | | // Set the successors for MBB. |
16847 | 0 | MBB->addSuccessor(CvtMBB); |
16848 | 0 | MBB->addSuccessor(DoneMBB); |
16849 | |
|
16850 | 0 | Register DstReg = MI.getOperand(0).getReg(); |
16851 | 0 | Register SrcReg = MI.getOperand(1).getReg(); |
16852 | 0 | Register MaxReg = MI.getOperand(2).getReg(); |
16853 | 0 | int64_t FRM = MI.getOperand(3).getImm(); |
16854 | |
|
16855 | 0 | const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); |
16856 | 0 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
16857 | |
|
16858 | 0 | Register FabsReg = MRI.createVirtualRegister(RC); |
16859 | 0 | BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); |
16860 | | |
16861 | | // Compare the FP value to the max value. |
16862 | 0 | Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
16863 | 0 | auto MIB = |
16864 | 0 | BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); |
16865 | 0 | if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) |
16866 | 0 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
16867 | | |
16868 | | // Insert branch. |
16869 | 0 | BuildMI(MBB, DL, TII.get(RISCV::BEQ)) |
16870 | 0 | .addReg(CmpReg) |
16871 | 0 | .addReg(RISCV::X0) |
16872 | 0 | .addMBB(DoneMBB); |
16873 | |
|
16874 | 0 | CvtMBB->addSuccessor(DoneMBB); |
16875 | | |
16876 | | // Convert to integer. |
16877 | 0 | Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); |
16878 | 0 | MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); |
16879 | 0 | if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) |
16880 | 0 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
16881 | | |
16882 | | // Convert back to FP. |
16883 | 0 | Register I2FReg = MRI.createVirtualRegister(RC); |
16884 | 0 | MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); |
16885 | 0 | if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) |
16886 | 0 | MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); |
16887 | | |
16888 | | // Restore the sign bit. |
16889 | 0 | Register CvtReg = MRI.createVirtualRegister(RC); |
16890 | 0 | BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); |
16891 | | |
16892 | | // Merge the results. |
16893 | 0 | BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) |
16894 | 0 | .addReg(SrcReg) |
16895 | 0 | .addMBB(MBB) |
16896 | 0 | .addReg(CvtReg) |
16897 | 0 | .addMBB(CvtMBB); |
16898 | |
|
16899 | 0 | MI.eraseFromParent(); |
16900 | 0 | return DoneMBB; |
16901 | 0 | } |
16902 | | |
16903 | | MachineBasicBlock * |
16904 | | RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
16905 | 0 | MachineBasicBlock *BB) const { |
16906 | 0 | switch (MI.getOpcode()) { |
16907 | 0 | default: |
16908 | 0 | llvm_unreachable("Unexpected instr type to insert"); |
16909 | 0 | case RISCV::ReadCycleWide: |
16910 | 0 | assert(!Subtarget.is64Bit() && |
16911 | 0 | "ReadCycleWrite is only to be used on riscv32"); |
16912 | 0 | return emitReadCycleWidePseudo(MI, BB); |
16913 | 0 | case RISCV::Select_GPR_Using_CC_GPR: |
16914 | 0 | case RISCV::Select_FPR16_Using_CC_GPR: |
16915 | 0 | case RISCV::Select_FPR16INX_Using_CC_GPR: |
16916 | 0 | case RISCV::Select_FPR32_Using_CC_GPR: |
16917 | 0 | case RISCV::Select_FPR32INX_Using_CC_GPR: |
16918 | 0 | case RISCV::Select_FPR64_Using_CC_GPR: |
16919 | 0 | case RISCV::Select_FPR64INX_Using_CC_GPR: |
16920 | 0 | case RISCV::Select_FPR64IN32X_Using_CC_GPR: |
16921 | 0 | return emitSelectPseudo(MI, BB, Subtarget); |
16922 | 0 | case RISCV::BuildPairF64Pseudo: |
16923 | 0 | case RISCV::BuildPairF64Pseudo_INX: |
16924 | 0 | return emitBuildPairF64Pseudo(MI, BB, Subtarget); |
16925 | 0 | case RISCV::SplitF64Pseudo: |
16926 | 0 | case RISCV::SplitF64Pseudo_INX: |
16927 | 0 | return emitSplitF64Pseudo(MI, BB, Subtarget); |
16928 | 0 | case RISCV::PseudoQuietFLE_H: |
16929 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); |
16930 | 0 | case RISCV::PseudoQuietFLE_H_INX: |
16931 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget); |
16932 | 0 | case RISCV::PseudoQuietFLT_H: |
16933 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); |
16934 | 0 | case RISCV::PseudoQuietFLT_H_INX: |
16935 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget); |
16936 | 0 | case RISCV::PseudoQuietFLE_S: |
16937 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); |
16938 | 0 | case RISCV::PseudoQuietFLE_S_INX: |
16939 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget); |
16940 | 0 | case RISCV::PseudoQuietFLT_S: |
16941 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); |
16942 | 0 | case RISCV::PseudoQuietFLT_S_INX: |
16943 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget); |
16944 | 0 | case RISCV::PseudoQuietFLE_D: |
16945 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); |
16946 | 0 | case RISCV::PseudoQuietFLE_D_INX: |
16947 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget); |
16948 | 0 | case RISCV::PseudoQuietFLE_D_IN32X: |
16949 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X, |
16950 | 0 | Subtarget); |
16951 | 0 | case RISCV::PseudoQuietFLT_D: |
16952 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); |
16953 | 0 | case RISCV::PseudoQuietFLT_D_INX: |
16954 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget); |
16955 | 0 | case RISCV::PseudoQuietFLT_D_IN32X: |
16956 | 0 | return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X, |
16957 | 0 | Subtarget); |
16958 | | |
16959 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: |
16960 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, |
16961 | 0 | RISCV::PseudoVFCVT_F_X_V_M1_MASK); |
16962 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: |
16963 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, |
16964 | 0 | RISCV::PseudoVFCVT_F_X_V_M2_MASK); |
16965 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: |
16966 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, |
16967 | 0 | RISCV::PseudoVFCVT_F_X_V_M4_MASK); |
16968 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: |
16969 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, |
16970 | 0 | RISCV::PseudoVFCVT_F_X_V_M8_MASK); |
16971 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: |
16972 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, |
16973 | 0 | RISCV::PseudoVFCVT_F_X_V_MF2_MASK); |
16974 | 0 | case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: |
16975 | 0 | return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, |
16976 | 0 | RISCV::PseudoVFCVT_F_X_V_MF4_MASK); |
16977 | 0 | case RISCV::PseudoFROUND_H: |
16978 | 0 | case RISCV::PseudoFROUND_H_INX: |
16979 | 0 | case RISCV::PseudoFROUND_S: |
16980 | 0 | case RISCV::PseudoFROUND_S_INX: |
16981 | 0 | case RISCV::PseudoFROUND_D: |
16982 | 0 | case RISCV::PseudoFROUND_D_INX: |
16983 | 0 | case RISCV::PseudoFROUND_D_IN32X: |
16984 | 0 | return emitFROUND(MI, BB, Subtarget); |
16985 | 0 | case TargetOpcode::STATEPOINT: |
16986 | 0 | case TargetOpcode::STACKMAP: |
16987 | 0 | case TargetOpcode::PATCHPOINT: |
16988 | 0 | if (!Subtarget.is64Bit()) |
16989 | 0 | report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only " |
16990 | 0 | "supported on 64-bit targets"); |
16991 | 0 | return emitPatchPoint(MI, BB); |
16992 | 0 | } |
16993 | 0 | } |
16994 | | |
16995 | | void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, |
16996 | 0 | SDNode *Node) const { |
16997 | | // Add FRM dependency to any instructions with dynamic rounding mode. |
16998 | 0 | int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm); |
16999 | 0 | if (Idx < 0) { |
17000 | | // Vector pseudos have FRM index indicated by TSFlags. |
17001 | 0 | Idx = RISCVII::getFRMOpNum(MI.getDesc()); |
17002 | 0 | if (Idx < 0) |
17003 | 0 | return; |
17004 | 0 | } |
17005 | 0 | if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) |
17006 | 0 | return; |
17007 | | // If the instruction already reads FRM, don't add another read. |
17008 | 0 | if (MI.readsRegister(RISCV::FRM)) |
17009 | 0 | return; |
17010 | 0 | MI.addOperand( |
17011 | 0 | MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); |
17012 | 0 | } |
17013 | | |
17014 | | // Calling Convention Implementation. |
17015 | | // The expectations for frontend ABI lowering vary from target to target. |
17016 | | // Ideally, an LLVM frontend would be able to avoid worrying about many ABI |
17017 | | // details, but this is a longer term goal. For now, we simply try to keep the |
17018 | | // role of the frontend as simple and well-defined as possible. The rules can |
17019 | | // be summarised as: |
17020 | | // * Never split up large scalar arguments. We handle them here. |
17021 | | // * If a hardfloat calling convention is being used, and the struct may be |
17022 | | // passed in a pair of registers (fp+fp, int+fp), and both registers are |
17023 | | // available, then pass as two separate arguments. If either the GPRs or FPRs |
17024 | | // are exhausted, then pass according to the rule below. |
17025 | | // * If a struct could never be passed in registers or directly in a stack |
17026 | | // slot (as it is larger than 2*XLEN and the floating point rules don't |
17027 | | // apply), then pass it using a pointer with the byval attribute. |
17028 | | // * If a struct is less than 2*XLEN, then coerce to either a two-element |
17029 | | // word-sized array or a 2*XLEN scalar (depending on alignment). |
17030 | | // * The frontend can determine whether a struct is returned by reference or |
17031 | | // not based on its size and fields. If it will be returned by reference, the |
17032 | | // frontend must modify the prototype so a pointer with the sret annotation is |
17033 | | // passed as the first argument. This is not necessary for large scalar |
17034 | | // returns. |
17035 | | // * Struct return values and varargs should be coerced to structs containing |
17036 | | // register-size fields in the same situations they would be for fixed |
17037 | | // arguments. |
17038 | | |
17039 | | static const MCPhysReg ArgFPR16s[] = { |
17040 | | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, |
17041 | | RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H |
17042 | | }; |
17043 | | static const MCPhysReg ArgFPR32s[] = { |
17044 | | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, |
17045 | | RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F |
17046 | | }; |
17047 | | static const MCPhysReg ArgFPR64s[] = { |
17048 | | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, |
17049 | | RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D |
17050 | | }; |
17051 | | // This is an interim calling convention and it may be changed in the future. |
17052 | | static const MCPhysReg ArgVRs[] = { |
17053 | | RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, |
17054 | | RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, |
17055 | | RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; |
17056 | | static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, |
17057 | | RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, |
17058 | | RISCV::V20M2, RISCV::V22M2}; |
17059 | | static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, |
17060 | | RISCV::V20M4}; |
17061 | | static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; |
17062 | | |
17063 | 195k | ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { |
17064 | | // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except |
17065 | | // the ILP32E ABI. |
17066 | 195k | static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
17067 | 195k | RISCV::X13, RISCV::X14, RISCV::X15, |
17068 | 195k | RISCV::X16, RISCV::X17}; |
17069 | | // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. |
17070 | 195k | static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
17071 | 195k | RISCV::X13, RISCV::X14, RISCV::X15}; |
17072 | | |
17073 | 195k | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
17074 | 0 | return ArrayRef(ArgEGPRs); |
17075 | | |
17076 | 195k | return ArrayRef(ArgIGPRs); |
17077 | 195k | } |
17078 | | |
17079 | 0 | static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { |
17080 | | // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used |
17081 | | // for save-restore libcall, so we don't use them. |
17082 | 0 | static const MCPhysReg FastCCIGPRs[] = { |
17083 | 0 | RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, |
17084 | 0 | RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, |
17085 | 0 | RISCV::X29, RISCV::X30, RISCV::X31}; |
17086 | | |
17087 | | // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. |
17088 | 0 | static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, |
17089 | 0 | RISCV::X13, RISCV::X14, RISCV::X15, |
17090 | 0 | RISCV::X7}; |
17091 | |
|
17092 | 0 | if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) |
17093 | 0 | return ArrayRef(FastCCEGPRs); |
17094 | | |
17095 | 0 | return ArrayRef(FastCCIGPRs); |
17096 | 0 | } |
17097 | | |
17098 | | // Pass a 2*XLEN argument that has been split into two XLEN values through |
17099 | | // registers or the stack as necessary. |
17100 | | static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, |
17101 | | ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, |
17102 | | MVT ValVT2, MVT LocVT2, |
17103 | 0 | ISD::ArgFlagsTy ArgFlags2, bool EABI) { |
17104 | 0 | unsigned XLenInBytes = XLen / 8; |
17105 | 0 | const RISCVSubtarget &STI = |
17106 | 0 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
17107 | 0 | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); |
17108 | |
|
17109 | 0 | if (Register Reg = State.AllocateReg(ArgGPRs)) { |
17110 | | // At least one half can be passed via register. |
17111 | 0 | State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, |
17112 | 0 | VA1.getLocVT(), CCValAssign::Full)); |
17113 | 0 | } else { |
17114 | | // Both halves must be passed on the stack, with proper alignment. |
17115 | | // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte |
17116 | | // alignment. This behavior may be changed when RV32E/ILP32E is ratified. |
17117 | 0 | Align StackAlign(XLenInBytes); |
17118 | 0 | if (!EABI || XLen != 32) |
17119 | 0 | StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); |
17120 | 0 | State.addLoc( |
17121 | 0 | CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), |
17122 | 0 | State.AllocateStack(XLenInBytes, StackAlign), |
17123 | 0 | VA1.getLocVT(), CCValAssign::Full)); |
17124 | 0 | State.addLoc(CCValAssign::getMem( |
17125 | 0 | ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), |
17126 | 0 | LocVT2, CCValAssign::Full)); |
17127 | 0 | return false; |
17128 | 0 | } |
17129 | | |
17130 | 0 | if (Register Reg = State.AllocateReg(ArgGPRs)) { |
17131 | | // The second half can also be passed via register. |
17132 | 0 | State.addLoc( |
17133 | 0 | CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); |
17134 | 0 | } else { |
17135 | | // The second half is passed via the stack, without additional alignment. |
17136 | 0 | State.addLoc(CCValAssign::getMem( |
17137 | 0 | ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), |
17138 | 0 | LocVT2, CCValAssign::Full)); |
17139 | 0 | } |
17140 | |
|
17141 | 0 | return false; |
17142 | 0 | } |
17143 | | |
17144 | | static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, |
17145 | | std::optional<unsigned> FirstMaskArgument, |
17146 | 0 | CCState &State, const RISCVTargetLowering &TLI) { |
17147 | 0 | const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); |
17148 | 0 | if (RC == &RISCV::VRRegClass) { |
17149 | | // Assign the first mask argument to V0. |
17150 | | // This is an interim calling convention and it may be changed in the |
17151 | | // future. |
17152 | 0 | if (FirstMaskArgument && ValNo == *FirstMaskArgument) |
17153 | 0 | return State.AllocateReg(RISCV::V0); |
17154 | 0 | return State.AllocateReg(ArgVRs); |
17155 | 0 | } |
17156 | 0 | if (RC == &RISCV::VRM2RegClass) |
17157 | 0 | return State.AllocateReg(ArgVRM2s); |
17158 | 0 | if (RC == &RISCV::VRM4RegClass) |
17159 | 0 | return State.AllocateReg(ArgVRM4s); |
17160 | 0 | if (RC == &RISCV::VRM8RegClass) |
17161 | 0 | return State.AllocateReg(ArgVRM8s); |
17162 | 0 | llvm_unreachable("Unhandled register class for ValueType"); |
17163 | 0 | } |
17164 | | |
17165 | | // Implements the RISC-V calling convention. Returns true upon failure. |
17166 | | bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, |
17167 | | MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, |
17168 | | ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, |
17169 | | bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, |
17170 | 195k | std::optional<unsigned> FirstMaskArgument) { |
17171 | 195k | unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); |
17172 | 195k | assert(XLen == 32 || XLen == 64); |
17173 | 195k | MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; |
17174 | | |
17175 | | // Static chain parameter must not be passed in normal argument registers, |
17176 | | // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain |
17177 | 195k | if (ArgFlags.isNest()) { |
17178 | 0 | if (unsigned Reg = State.AllocateReg(RISCV::X7)) { |
17179 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17180 | 0 | return false; |
17181 | 0 | } |
17182 | 0 | } |
17183 | | |
17184 | | // Any return value split in to more than two values can't be returned |
17185 | | // directly. Vectors are returned via the available vector registers. |
17186 | 195k | if (!LocVT.isVector() && IsRet && ValNo > 1) |
17187 | 0 | return true; |
17188 | | |
17189 | | // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a |
17190 | | // variadic argument, or if no F16/F32 argument registers are available. |
17191 | 195k | bool UseGPRForF16_F32 = true; |
17192 | | // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a |
17193 | | // variadic argument, or if no F64 argument registers are available. |
17194 | 195k | bool UseGPRForF64 = true; |
17195 | | |
17196 | 195k | switch (ABI) { |
17197 | 0 | default: |
17198 | 0 | llvm_unreachable("Unexpected ABI"); |
17199 | 0 | case RISCVABI::ABI_ILP32: |
17200 | 0 | case RISCVABI::ABI_ILP32E: |
17201 | 195k | case RISCVABI::ABI_LP64: |
17202 | 195k | case RISCVABI::ABI_LP64E: |
17203 | 195k | break; |
17204 | 0 | case RISCVABI::ABI_ILP32F: |
17205 | 0 | case RISCVABI::ABI_LP64F: |
17206 | 0 | UseGPRForF16_F32 = !IsFixed; |
17207 | 0 | break; |
17208 | 0 | case RISCVABI::ABI_ILP32D: |
17209 | 0 | case RISCVABI::ABI_LP64D: |
17210 | 0 | UseGPRForF16_F32 = !IsFixed; |
17211 | 0 | UseGPRForF64 = !IsFixed; |
17212 | 0 | break; |
17213 | 195k | } |
17214 | | |
17215 | | // FPR16, FPR32, and FPR64 alias each other. |
17216 | 195k | if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { |
17217 | 0 | UseGPRForF16_F32 = true; |
17218 | 0 | UseGPRForF64 = true; |
17219 | 0 | } |
17220 | | |
17221 | | // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and |
17222 | | // similar local variables rather than directly checking against the target |
17223 | | // ABI. |
17224 | | |
17225 | 195k | if (UseGPRForF16_F32 && |
17226 | 195k | (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) { |
17227 | 0 | LocVT = XLenVT; |
17228 | 0 | LocInfo = CCValAssign::BCvt; |
17229 | 195k | } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { |
17230 | 0 | LocVT = MVT::i64; |
17231 | 0 | LocInfo = CCValAssign::BCvt; |
17232 | 0 | } |
17233 | | |
17234 | 195k | ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); |
17235 | | |
17236 | | // If this is a variadic argument, the RISC-V calling convention requires |
17237 | | // that it is assigned an 'even' or 'aligned' register if it has 8-byte |
17238 | | // alignment (RV32) or 16-byte alignment (RV64). An aligned register should |
17239 | | // be used regardless of whether the original argument was split during |
17240 | | // legalisation or not. The argument will not be passed by registers if the |
17241 | | // original type is larger than 2*XLEN, so the register alignment rule does |
17242 | | // not apply. |
17243 | | // TODO: To be compatible with GCC's behaviors, we don't align registers |
17244 | | // currently if we are using ILP32E calling convention. This behavior may be |
17245 | | // changed when RV32E/ILP32E is ratified. |
17246 | 195k | unsigned TwoXLenInBytes = (2 * XLen) / 8; |
17247 | 195k | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && |
17248 | 195k | DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && |
17249 | 195k | ABI != RISCVABI::ABI_ILP32E) { |
17250 | 0 | unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); |
17251 | | // Skip 'odd' register if necessary. |
17252 | 0 | if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) |
17253 | 0 | State.AllocateReg(ArgGPRs); |
17254 | 0 | } |
17255 | | |
17256 | 195k | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
17257 | 195k | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
17258 | 195k | State.getPendingArgFlags(); |
17259 | | |
17260 | 195k | assert(PendingLocs.size() == PendingArgFlags.size() && |
17261 | 195k | "PendingLocs and PendingArgFlags out of sync"); |
17262 | | |
17263 | | // Handle passing f64 on RV32D with a soft float ABI or when floating point |
17264 | | // registers are exhausted. |
17265 | 195k | if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { |
17266 | 0 | assert(PendingLocs.empty() && "Can't lower f64 if it is split"); |
17267 | | // Depending on available argument GPRS, f64 may be passed in a pair of |
17268 | | // GPRs, split between a GPR and the stack, or passed completely on the |
17269 | | // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these |
17270 | | // cases. |
17271 | 0 | Register Reg = State.AllocateReg(ArgGPRs); |
17272 | 0 | if (!Reg) { |
17273 | 0 | unsigned StackOffset = State.AllocateStack(8, Align(8)); |
17274 | 0 | State.addLoc( |
17275 | 0 | CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); |
17276 | 0 | return false; |
17277 | 0 | } |
17278 | 0 | LocVT = MVT::i32; |
17279 | 0 | State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17280 | 0 | Register HiReg = State.AllocateReg(ArgGPRs); |
17281 | 0 | if (HiReg) { |
17282 | 0 | State.addLoc( |
17283 | 0 | CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); |
17284 | 0 | } else { |
17285 | 0 | unsigned StackOffset = State.AllocateStack(4, Align(4)); |
17286 | 0 | State.addLoc( |
17287 | 0 | CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); |
17288 | 0 | } |
17289 | 0 | return false; |
17290 | 0 | } |
17291 | | |
17292 | | // Fixed-length vectors are located in the corresponding scalable-vector |
17293 | | // container types. |
17294 | 195k | if (ValVT.isFixedLengthVector()) |
17295 | 0 | LocVT = TLI.getContainerForFixedLengthVector(LocVT); |
17296 | | |
17297 | | // Split arguments might be passed indirectly, so keep track of the pending |
17298 | | // values. Split vectors are passed via a mix of registers and indirectly, so |
17299 | | // treat them as we would any other argument. |
17300 | 195k | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
17301 | 0 | LocVT = XLenVT; |
17302 | 0 | LocInfo = CCValAssign::Indirect; |
17303 | 0 | PendingLocs.push_back( |
17304 | 0 | CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); |
17305 | 0 | PendingArgFlags.push_back(ArgFlags); |
17306 | 0 | if (!ArgFlags.isSplitEnd()) { |
17307 | 0 | return false; |
17308 | 0 | } |
17309 | 0 | } |
17310 | | |
17311 | | // If the split argument only had two elements, it should be passed directly |
17312 | | // in registers or on the stack. |
17313 | 195k | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
17314 | 195k | PendingLocs.size() <= 2) { |
17315 | 0 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); |
17316 | | // Apply the normal calling convention rules to the first half of the |
17317 | | // split argument. |
17318 | 0 | CCValAssign VA = PendingLocs[0]; |
17319 | 0 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
17320 | 0 | PendingLocs.clear(); |
17321 | 0 | PendingArgFlags.clear(); |
17322 | 0 | return CC_RISCVAssign2XLen( |
17323 | 0 | XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, |
17324 | 0 | ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); |
17325 | 0 | } |
17326 | | |
17327 | | // Allocate to a register if possible, or else a stack slot. |
17328 | 195k | Register Reg; |
17329 | 195k | unsigned StoreSizeBytes = XLen / 8; |
17330 | 195k | Align StackAlign = Align(XLen / 8); |
17331 | | |
17332 | 195k | if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32) |
17333 | 0 | Reg = State.AllocateReg(ArgFPR16s); |
17334 | 195k | else if (ValVT == MVT::f32 && !UseGPRForF16_F32) |
17335 | 0 | Reg = State.AllocateReg(ArgFPR32s); |
17336 | 195k | else if (ValVT == MVT::f64 && !UseGPRForF64) |
17337 | 0 | Reg = State.AllocateReg(ArgFPR64s); |
17338 | 195k | else if (ValVT.isVector()) { |
17339 | 0 | Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); |
17340 | 0 | if (!Reg) { |
17341 | | // For return values, the vector must be passed fully via registers or |
17342 | | // via the stack. |
17343 | | // FIXME: The proposed vector ABI only mandates v8-v15 for return values, |
17344 | | // but we're using all of them. |
17345 | 0 | if (IsRet) |
17346 | 0 | return true; |
17347 | | // Try using a GPR to pass the address |
17348 | 0 | if ((Reg = State.AllocateReg(ArgGPRs))) { |
17349 | 0 | LocVT = XLenVT; |
17350 | 0 | LocInfo = CCValAssign::Indirect; |
17351 | 0 | } else if (ValVT.isScalableVector()) { |
17352 | 0 | LocVT = XLenVT; |
17353 | 0 | LocInfo = CCValAssign::Indirect; |
17354 | 0 | } else { |
17355 | | // Pass fixed-length vectors on the stack. |
17356 | 0 | LocVT = ValVT; |
17357 | 0 | StoreSizeBytes = ValVT.getStoreSize(); |
17358 | | // Align vectors to their element sizes, being careful for vXi1 |
17359 | | // vectors. |
17360 | 0 | StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
17361 | 0 | } |
17362 | 0 | } |
17363 | 195k | } else { |
17364 | 195k | Reg = State.AllocateReg(ArgGPRs); |
17365 | 195k | } |
17366 | | |
17367 | 195k | unsigned StackOffset = |
17368 | 195k | Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); |
17369 | | |
17370 | | // If we reach this point and PendingLocs is non-empty, we must be at the |
17371 | | // end of a split argument that must be passed indirectly. |
17372 | 195k | if (!PendingLocs.empty()) { |
17373 | 0 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); |
17374 | 0 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); |
17375 | | |
17376 | 0 | for (auto &It : PendingLocs) { |
17377 | 0 | if (Reg) |
17378 | 0 | It.convertToReg(Reg); |
17379 | 0 | else |
17380 | 0 | It.convertToMem(StackOffset); |
17381 | 0 | State.addLoc(It); |
17382 | 0 | } |
17383 | 0 | PendingLocs.clear(); |
17384 | 0 | PendingArgFlags.clear(); |
17385 | 0 | return false; |
17386 | 0 | } |
17387 | | |
17388 | 195k | assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || |
17389 | 195k | (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && |
17390 | 195k | "Expected an XLenVT or vector types at this stage"); |
17391 | | |
17392 | 195k | if (Reg) { |
17393 | 195k | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17394 | 195k | return false; |
17395 | 195k | } |
17396 | | |
17397 | | // When a scalar floating-point value is passed on the stack, no |
17398 | | // bit-conversion is needed. |
17399 | 0 | if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) { |
17400 | 0 | assert(!ValVT.isVector()); |
17401 | 0 | LocVT = ValVT; |
17402 | 0 | LocInfo = CCValAssign::Full; |
17403 | 0 | } |
17404 | 0 | State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); |
17405 | 0 | return false; |
17406 | 195k | } |
17407 | | |
17408 | | template <typename ArgTy> |
17409 | 0 | static std::optional<unsigned> preAssignMask(const ArgTy &Args) { |
17410 | 0 | for (const auto &ArgIdx : enumerate(Args)) { |
17411 | 0 | MVT ArgVT = ArgIdx.value().VT; |
17412 | 0 | if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) |
17413 | 0 | return ArgIdx.index(); |
17414 | 0 | } |
17415 | 0 | return std::nullopt; |
17416 | 0 | } Unexecuted instantiation: RISCVISelLowering.cpp:std::__1::optional<unsigned int> preAssignMask<llvm::SmallVectorImpl<llvm::ISD::InputArg> >(llvm::SmallVectorImpl<llvm::ISD::InputArg> const&) Unexecuted instantiation: RISCVISelLowering.cpp:std::__1::optional<unsigned int> preAssignMask<llvm::SmallVectorImpl<llvm::ISD::OutputArg> >(llvm::SmallVectorImpl<llvm::ISD::OutputArg> const&) |
17417 | | |
17418 | | void RISCVTargetLowering::analyzeInputArgs( |
17419 | | MachineFunction &MF, CCState &CCInfo, |
17420 | | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
17421 | 48.9k | RISCVCCAssignFn Fn) const { |
17422 | 48.9k | unsigned NumArgs = Ins.size(); |
17423 | 48.9k | FunctionType *FType = MF.getFunction().getFunctionType(); |
17424 | | |
17425 | 48.9k | std::optional<unsigned> FirstMaskArgument; |
17426 | 48.9k | if (Subtarget.hasVInstructions()) |
17427 | 0 | FirstMaskArgument = preAssignMask(Ins); |
17428 | | |
17429 | 103k | for (unsigned i = 0; i != NumArgs; ++i) { |
17430 | 54.7k | MVT ArgVT = Ins[i].VT; |
17431 | 54.7k | ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; |
17432 | | |
17433 | 54.7k | Type *ArgTy = nullptr; |
17434 | 54.7k | if (IsRet) |
17435 | 42.7k | ArgTy = FType->getReturnType(); |
17436 | 12.0k | else if (Ins[i].isOrigArg()) |
17437 | 12.0k | ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); |
17438 | | |
17439 | 54.7k | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
17440 | 54.7k | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
17441 | 54.7k | ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, |
17442 | 54.7k | FirstMaskArgument)) { |
17443 | 0 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " |
17444 | 0 | << ArgVT << '\n'); |
17445 | 0 | llvm_unreachable(nullptr); |
17446 | 0 | } |
17447 | 54.7k | } |
17448 | 48.9k | } |
17449 | | |
17450 | | void RISCVTargetLowering::analyzeOutputArgs( |
17451 | | MachineFunction &MF, CCState &CCInfo, |
17452 | | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
17453 | 48.9k | CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { |
17454 | 48.9k | unsigned NumArgs = Outs.size(); |
17455 | | |
17456 | 48.9k | std::optional<unsigned> FirstMaskArgument; |
17457 | 48.9k | if (Subtarget.hasVInstructions()) |
17458 | 0 | FirstMaskArgument = preAssignMask(Outs); |
17459 | | |
17460 | 140k | for (unsigned i = 0; i != NumArgs; i++) { |
17461 | 91.7k | MVT ArgVT = Outs[i].VT; |
17462 | 91.7k | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
17463 | 91.7k | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
17464 | | |
17465 | 91.7k | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
17466 | 91.7k | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, |
17467 | 91.7k | ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, |
17468 | 91.7k | FirstMaskArgument)) { |
17469 | 0 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " |
17470 | 0 | << ArgVT << "\n"); |
17471 | 0 | llvm_unreachable(nullptr); |
17472 | 0 | } |
17473 | 91.7k | } |
17474 | 48.9k | } |
17475 | | |
17476 | | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
17477 | | // values. |
17478 | | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
17479 | | const CCValAssign &VA, const SDLoc &DL, |
17480 | 54.7k | const RISCVSubtarget &Subtarget) { |
17481 | 54.7k | switch (VA.getLocInfo()) { |
17482 | 0 | default: |
17483 | 0 | llvm_unreachable("Unexpected CCValAssign::LocInfo"); |
17484 | 54.7k | case CCValAssign::Full: |
17485 | 54.7k | if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) |
17486 | 0 | Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); |
17487 | 54.7k | break; |
17488 | 0 | case CCValAssign::BCvt: |
17489 | 0 | if (VA.getLocVT().isInteger() && |
17490 | 0 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
17491 | 0 | Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); |
17492 | 0 | } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { |
17493 | 0 | if (RV64LegalI32) { |
17494 | 0 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val); |
17495 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); |
17496 | 0 | } else { |
17497 | 0 | Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); |
17498 | 0 | } |
17499 | 0 | } else { |
17500 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); |
17501 | 0 | } |
17502 | 0 | break; |
17503 | 54.7k | } |
17504 | 54.7k | return Val; |
17505 | 54.7k | } |
17506 | | |
17507 | | // The caller is responsible for loading the full value if the argument is |
17508 | | // passed with CCValAssign::Indirect. |
17509 | | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
17510 | | const CCValAssign &VA, const SDLoc &DL, |
17511 | | const ISD::InputArg &In, |
17512 | 12.0k | const RISCVTargetLowering &TLI) { |
17513 | 12.0k | MachineFunction &MF = DAG.getMachineFunction(); |
17514 | 12.0k | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
17515 | 12.0k | EVT LocVT = VA.getLocVT(); |
17516 | 12.0k | SDValue Val; |
17517 | 12.0k | const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); |
17518 | 12.0k | Register VReg = RegInfo.createVirtualRegister(RC); |
17519 | 12.0k | RegInfo.addLiveIn(VA.getLocReg(), VReg); |
17520 | 12.0k | Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); |
17521 | | |
17522 | | // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. |
17523 | 12.0k | if (In.isOrigArg()) { |
17524 | 12.0k | Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); |
17525 | 12.0k | if (OrigArg->getType()->isIntegerTy()) { |
17526 | 7.44k | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
17527 | | // An input zero extended from i31 can also be considered sign extended. |
17528 | 7.44k | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
17529 | 7.44k | (BitWidth < 32 && In.Flags.isZExt())) { |
17530 | 0 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
17531 | 0 | RVFI->addSExt32Register(VReg); |
17532 | 0 | } |
17533 | 7.44k | } |
17534 | 12.0k | } |
17535 | | |
17536 | 12.0k | if (VA.getLocInfo() == CCValAssign::Indirect) |
17537 | 0 | return Val; |
17538 | | |
17539 | 12.0k | return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); |
17540 | 12.0k | } |
17541 | | |
17542 | | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
17543 | | const CCValAssign &VA, const SDLoc &DL, |
17544 | 91.7k | const RISCVSubtarget &Subtarget) { |
17545 | 91.7k | EVT LocVT = VA.getLocVT(); |
17546 | | |
17547 | 91.7k | switch (VA.getLocInfo()) { |
17548 | 0 | default: |
17549 | 0 | llvm_unreachable("Unexpected CCValAssign::LocInfo"); |
17550 | 91.7k | case CCValAssign::Full: |
17551 | 91.7k | if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) |
17552 | 0 | Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); |
17553 | 91.7k | break; |
17554 | 0 | case CCValAssign::BCvt: |
17555 | 0 | if (LocVT.isInteger() && |
17556 | 0 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
17557 | 0 | Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); |
17558 | 0 | } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) { |
17559 | 0 | if (RV64LegalI32) { |
17560 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); |
17561 | 0 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val); |
17562 | 0 | } else { |
17563 | 0 | Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); |
17564 | 0 | } |
17565 | 0 | } else { |
17566 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); |
17567 | 0 | } |
17568 | 0 | break; |
17569 | 91.7k | } |
17570 | 91.7k | return Val; |
17571 | 91.7k | } |
17572 | | |
17573 | | // The caller is responsible for loading the full value if the argument is |
17574 | | // passed with CCValAssign::Indirect. |
17575 | | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
17576 | 0 | const CCValAssign &VA, const SDLoc &DL) { |
17577 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
17578 | 0 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
17579 | 0 | EVT LocVT = VA.getLocVT(); |
17580 | 0 | EVT ValVT = VA.getValVT(); |
17581 | 0 | EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); |
17582 | 0 | if (ValVT.isScalableVector()) { |
17583 | | // When the value is a scalable vector, we save the pointer which points to |
17584 | | // the scalable vector value in the stack. The ValVT will be the pointer |
17585 | | // type, instead of the scalable vector type. |
17586 | 0 | ValVT = LocVT; |
17587 | 0 | } |
17588 | 0 | int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), |
17589 | 0 | /*IsImmutable=*/true); |
17590 | 0 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
17591 | 0 | SDValue Val; |
17592 | |
|
17593 | 0 | ISD::LoadExtType ExtType; |
17594 | 0 | switch (VA.getLocInfo()) { |
17595 | 0 | default: |
17596 | 0 | llvm_unreachable("Unexpected CCValAssign::LocInfo"); |
17597 | 0 | case CCValAssign::Full: |
17598 | 0 | case CCValAssign::Indirect: |
17599 | 0 | case CCValAssign::BCvt: |
17600 | 0 | ExtType = ISD::NON_EXTLOAD; |
17601 | 0 | break; |
17602 | 0 | } |
17603 | 0 | Val = DAG.getExtLoad( |
17604 | 0 | ExtType, DL, LocVT, Chain, FIN, |
17605 | 0 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); |
17606 | 0 | return Val; |
17607 | 0 | } |
17608 | | |
17609 | | static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, |
17610 | | const CCValAssign &VA, |
17611 | | const CCValAssign &HiVA, |
17612 | 0 | const SDLoc &DL) { |
17613 | 0 | assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && |
17614 | 0 | "Unexpected VA"); |
17615 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
17616 | 0 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
17617 | 0 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
17618 | |
|
17619 | 0 | assert(VA.isRegLoc() && "Expected register VA assignment"); |
17620 | | |
17621 | 0 | Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
17622 | 0 | RegInfo.addLiveIn(VA.getLocReg(), LoVReg); |
17623 | 0 | SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); |
17624 | 0 | SDValue Hi; |
17625 | 0 | if (HiVA.isMemLoc()) { |
17626 | | // Second half of f64 is passed on the stack. |
17627 | 0 | int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(), |
17628 | 0 | /*IsImmutable=*/true); |
17629 | 0 | SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); |
17630 | 0 | Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, |
17631 | 0 | MachinePointerInfo::getFixedStack(MF, FI)); |
17632 | 0 | } else { |
17633 | | // Second half of f64 is passed in another GPR. |
17634 | 0 | Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); |
17635 | 0 | RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg); |
17636 | 0 | Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); |
17637 | 0 | } |
17638 | 0 | return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); |
17639 | 0 | } |
17640 | | |
17641 | | // FastCC has less than 1% performance improvement for some particular |
17642 | | // benchmark. But theoretically, it may has benenfit for some cases. |
17643 | | bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, |
17644 | | unsigned ValNo, MVT ValVT, MVT LocVT, |
17645 | | CCValAssign::LocInfo LocInfo, |
17646 | | ISD::ArgFlagsTy ArgFlags, CCState &State, |
17647 | | bool IsFixed, bool IsRet, Type *OrigTy, |
17648 | | const RISCVTargetLowering &TLI, |
17649 | 0 | std::optional<unsigned> FirstMaskArgument) { |
17650 | 0 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
17651 | 0 | if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { |
17652 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17653 | 0 | return false; |
17654 | 0 | } |
17655 | 0 | } |
17656 | | |
17657 | 0 | const RISCVSubtarget &Subtarget = TLI.getSubtarget(); |
17658 | |
|
17659 | 0 | if (LocVT == MVT::f16 && |
17660 | 0 | (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) { |
17661 | 0 | static const MCPhysReg FPR16List[] = { |
17662 | 0 | RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, |
17663 | 0 | RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, |
17664 | 0 | RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, |
17665 | 0 | RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; |
17666 | 0 | if (unsigned Reg = State.AllocateReg(FPR16List)) { |
17667 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17668 | 0 | return false; |
17669 | 0 | } |
17670 | 0 | } |
17671 | | |
17672 | 0 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
17673 | 0 | static const MCPhysReg FPR32List[] = { |
17674 | 0 | RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, |
17675 | 0 | RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, |
17676 | 0 | RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, |
17677 | 0 | RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; |
17678 | 0 | if (unsigned Reg = State.AllocateReg(FPR32List)) { |
17679 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17680 | 0 | return false; |
17681 | 0 | } |
17682 | 0 | } |
17683 | | |
17684 | 0 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
17685 | 0 | static const MCPhysReg FPR64List[] = { |
17686 | 0 | RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, |
17687 | 0 | RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, |
17688 | 0 | RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, |
17689 | 0 | RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; |
17690 | 0 | if (unsigned Reg = State.AllocateReg(FPR64List)) { |
17691 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17692 | 0 | return false; |
17693 | 0 | } |
17694 | 0 | } |
17695 | | |
17696 | | // Check if there is an available GPR before hitting the stack. |
17697 | 0 | if ((LocVT == MVT::f16 && |
17698 | 0 | (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) || |
17699 | 0 | (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
17700 | 0 | (LocVT == MVT::f64 && Subtarget.is64Bit() && |
17701 | 0 | Subtarget.hasStdExtZdinx())) { |
17702 | 0 | if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { |
17703 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17704 | 0 | return false; |
17705 | 0 | } |
17706 | 0 | } |
17707 | | |
17708 | 0 | if (LocVT == MVT::f16) { |
17709 | 0 | unsigned Offset2 = State.AllocateStack(2, Align(2)); |
17710 | 0 | State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo)); |
17711 | 0 | return false; |
17712 | 0 | } |
17713 | | |
17714 | 0 | if (LocVT == MVT::i32 || LocVT == MVT::f32) { |
17715 | 0 | unsigned Offset4 = State.AllocateStack(4, Align(4)); |
17716 | 0 | State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); |
17717 | 0 | return false; |
17718 | 0 | } |
17719 | | |
17720 | 0 | if (LocVT == MVT::i64 || LocVT == MVT::f64) { |
17721 | 0 | unsigned Offset5 = State.AllocateStack(8, Align(8)); |
17722 | 0 | State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); |
17723 | 0 | return false; |
17724 | 0 | } |
17725 | | |
17726 | 0 | if (LocVT.isVector()) { |
17727 | 0 | if (unsigned Reg = |
17728 | 0 | allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { |
17729 | | // Fixed-length vectors are located in the corresponding scalable-vector |
17730 | | // container types. |
17731 | 0 | if (ValVT.isFixedLengthVector()) |
17732 | 0 | LocVT = TLI.getContainerForFixedLengthVector(LocVT); |
17733 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17734 | 0 | } else { |
17735 | | // Try and pass the address via a "fast" GPR. |
17736 | 0 | if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { |
17737 | 0 | LocInfo = CCValAssign::Indirect; |
17738 | 0 | LocVT = TLI.getSubtarget().getXLenVT(); |
17739 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); |
17740 | 0 | } else if (ValVT.isFixedLengthVector()) { |
17741 | 0 | auto StackAlign = |
17742 | 0 | MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); |
17743 | 0 | unsigned StackOffset = |
17744 | 0 | State.AllocateStack(ValVT.getStoreSize(), StackAlign); |
17745 | 0 | State.addLoc( |
17746 | 0 | CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); |
17747 | 0 | } else { |
17748 | | // Can't pass scalable vectors on the stack. |
17749 | 0 | return true; |
17750 | 0 | } |
17751 | 0 | } |
17752 | | |
17753 | 0 | return false; |
17754 | 0 | } |
17755 | | |
17756 | 0 | return true; // CC didn't match. |
17757 | 0 | } |
17758 | | |
17759 | | bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
17760 | | CCValAssign::LocInfo LocInfo, |
17761 | 0 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
17762 | 0 | if (ArgFlags.isNest()) { |
17763 | 0 | report_fatal_error( |
17764 | 0 | "Attribute 'nest' is not supported in GHC calling convention"); |
17765 | 0 | } |
17766 | |
|
17767 | 0 | static const MCPhysReg GPRList[] = { |
17768 | 0 | RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, |
17769 | 0 | RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; |
17770 | |
|
17771 | 0 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
17772 | | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim |
17773 | | // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 |
17774 | 0 | if (unsigned Reg = State.AllocateReg(GPRList)) { |
17775 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17776 | 0 | return false; |
17777 | 0 | } |
17778 | 0 | } |
17779 | | |
17780 | 0 | const RISCVSubtarget &Subtarget = |
17781 | 0 | State.getMachineFunction().getSubtarget<RISCVSubtarget>(); |
17782 | |
|
17783 | 0 | if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) { |
17784 | | // Pass in STG registers: F1, ..., F6 |
17785 | | // fs0 ... fs5 |
17786 | 0 | static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, |
17787 | 0 | RISCV::F18_F, RISCV::F19_F, |
17788 | 0 | RISCV::F20_F, RISCV::F21_F}; |
17789 | 0 | if (unsigned Reg = State.AllocateReg(FPR32List)) { |
17790 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17791 | 0 | return false; |
17792 | 0 | } |
17793 | 0 | } |
17794 | | |
17795 | 0 | if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) { |
17796 | | // Pass in STG registers: D1, ..., D6 |
17797 | | // fs6 ... fs11 |
17798 | 0 | static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, |
17799 | 0 | RISCV::F24_D, RISCV::F25_D, |
17800 | 0 | RISCV::F26_D, RISCV::F27_D}; |
17801 | 0 | if (unsigned Reg = State.AllocateReg(FPR64List)) { |
17802 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17803 | 0 | return false; |
17804 | 0 | } |
17805 | 0 | } |
17806 | | |
17807 | 0 | if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || |
17808 | 0 | (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && |
17809 | 0 | Subtarget.is64Bit())) { |
17810 | 0 | if (unsigned Reg = State.AllocateReg(GPRList)) { |
17811 | 0 | State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); |
17812 | 0 | return false; |
17813 | 0 | } |
17814 | 0 | } |
17815 | | |
17816 | 0 | report_fatal_error("No registers left in GHC calling convention"); |
17817 | 0 | return true; |
17818 | 0 | } |
17819 | | |
17820 | | // Transform physical registers into virtual registers. |
17821 | | SDValue RISCVTargetLowering::LowerFormalArguments( |
17822 | | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
17823 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
17824 | 6.24k | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
17825 | | |
17826 | 6.24k | MachineFunction &MF = DAG.getMachineFunction(); |
17827 | | |
17828 | 6.24k | switch (CallConv) { |
17829 | 0 | default: |
17830 | 0 | report_fatal_error("Unsupported calling convention"); |
17831 | 6.24k | case CallingConv::C: |
17832 | 6.24k | case CallingConv::Fast: |
17833 | 6.24k | case CallingConv::SPIR_KERNEL: |
17834 | 6.24k | case CallingConv::GRAAL: |
17835 | 6.24k | break; |
17836 | 0 | case CallingConv::GHC: |
17837 | 0 | if (Subtarget.isRVE()) |
17838 | 0 | report_fatal_error("GHC calling convention is not supported on RVE!"); |
17839 | 0 | if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) |
17840 | 0 | report_fatal_error("GHC calling convention requires the (Zfinx/F) and " |
17841 | 0 | "(Zdinx/D) instruction set extensions"); |
17842 | 6.24k | } |
17843 | | |
17844 | 6.24k | const Function &Func = MF.getFunction(); |
17845 | 6.24k | if (Func.hasFnAttribute("interrupt")) { |
17846 | 0 | if (!Func.arg_empty()) |
17847 | 0 | report_fatal_error( |
17848 | 0 | "Functions with the interrupt attribute cannot have arguments!"); |
17849 | |
|
17850 | 0 | StringRef Kind = |
17851 | 0 | MF.getFunction().getFnAttribute("interrupt").getValueAsString(); |
17852 | |
|
17853 | 0 | if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) |
17854 | 0 | report_fatal_error( |
17855 | 0 | "Function interrupt attribute argument not supported!"); |
17856 | 0 | } |
17857 | | |
17858 | 6.24k | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
17859 | 6.24k | MVT XLenVT = Subtarget.getXLenVT(); |
17860 | 6.24k | unsigned XLenInBytes = Subtarget.getXLen() / 8; |
17861 | | // Used with vargs to acumulate store chains. |
17862 | 6.24k | std::vector<SDValue> OutChains; |
17863 | | |
17864 | | // Assign locations to all of the incoming arguments. |
17865 | 6.24k | SmallVector<CCValAssign, 16> ArgLocs; |
17866 | 6.24k | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
17867 | | |
17868 | 6.24k | if (CallConv == CallingConv::GHC) |
17869 | 0 | CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC); |
17870 | 6.24k | else |
17871 | 6.24k | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, |
17872 | 6.24k | CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
17873 | 6.24k | : RISCV::CC_RISCV); |
17874 | | |
17875 | 18.2k | for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { |
17876 | 12.0k | CCValAssign &VA = ArgLocs[i]; |
17877 | 12.0k | SDValue ArgValue; |
17878 | | // Passing f64 on RV32D with a soft float ABI must be handled as a special |
17879 | | // case. |
17880 | 12.0k | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
17881 | 0 | assert(VA.needsCustom()); |
17882 | 0 | ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL); |
17883 | 12.0k | } else if (VA.isRegLoc()) |
17884 | 12.0k | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this); |
17885 | 0 | else |
17886 | 0 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
17887 | | |
17888 | 12.0k | if (VA.getLocInfo() == CCValAssign::Indirect) { |
17889 | | // If the original argument was split and passed by reference (e.g. i128 |
17890 | | // on RV32), we need to load all parts of it here (using the same |
17891 | | // address). Vectors may be partly split to registers and partly to the |
17892 | | // stack, in which case the base address is partly offset and subsequent |
17893 | | // stores are relative to that. |
17894 | 0 | InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, |
17895 | 0 | MachinePointerInfo())); |
17896 | 0 | unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; |
17897 | 0 | unsigned ArgPartOffset = Ins[InsIdx].PartOffset; |
17898 | 0 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
17899 | 0 | while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { |
17900 | 0 | CCValAssign &PartVA = ArgLocs[i + 1]; |
17901 | 0 | unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; |
17902 | 0 | SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); |
17903 | 0 | if (PartVA.getValVT().isScalableVector()) |
17904 | 0 | Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); |
17905 | 0 | SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); |
17906 | 0 | InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, |
17907 | 0 | MachinePointerInfo())); |
17908 | 0 | ++i; |
17909 | 0 | ++InsIdx; |
17910 | 0 | } |
17911 | 0 | continue; |
17912 | 0 | } |
17913 | 12.0k | InVals.push_back(ArgValue); |
17914 | 12.0k | } |
17915 | | |
17916 | 6.24k | if (any_of(ArgLocs, |
17917 | 12.0k | [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
17918 | 0 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
17919 | | |
17920 | 6.24k | if (IsVarArg) { |
17921 | 0 | ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); |
17922 | 0 | unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); |
17923 | 0 | const TargetRegisterClass *RC = &RISCV::GPRRegClass; |
17924 | 0 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
17925 | 0 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
17926 | 0 | RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); |
17927 | | |
17928 | | // Size of the vararg save area. For now, the varargs save area is either |
17929 | | // zero or large enough to hold a0-a7. |
17930 | 0 | int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); |
17931 | 0 | int FI; |
17932 | | |
17933 | | // If all registers are allocated, then all varargs must be passed on the |
17934 | | // stack and we don't need to save any argregs. |
17935 | 0 | if (VarArgsSaveSize == 0) { |
17936 | 0 | int VaArgOffset = CCInfo.getStackSize(); |
17937 | 0 | FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); |
17938 | 0 | } else { |
17939 | 0 | int VaArgOffset = -VarArgsSaveSize; |
17940 | 0 | FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true); |
17941 | | |
17942 | | // If saving an odd number of registers then create an extra stack slot to |
17943 | | // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures |
17944 | | // offsets to even-numbered registered remain 2*XLEN-aligned. |
17945 | 0 | if (Idx % 2) { |
17946 | 0 | MFI.CreateFixedObject( |
17947 | 0 | XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true); |
17948 | 0 | VarArgsSaveSize += XLenInBytes; |
17949 | 0 | } |
17950 | |
|
17951 | 0 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
17952 | | |
17953 | | // Copy the integer registers that may have been used for passing varargs |
17954 | | // to the vararg save area. |
17955 | 0 | for (unsigned I = Idx; I < ArgRegs.size(); ++I) { |
17956 | 0 | const Register Reg = RegInfo.createVirtualRegister(RC); |
17957 | 0 | RegInfo.addLiveIn(ArgRegs[I], Reg); |
17958 | 0 | SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); |
17959 | 0 | SDValue Store = DAG.getStore( |
17960 | 0 | Chain, DL, ArgValue, FIN, |
17961 | 0 | MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes)); |
17962 | 0 | OutChains.push_back(Store); |
17963 | 0 | FIN = |
17964 | 0 | DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL); |
17965 | 0 | } |
17966 | 0 | } |
17967 | | |
17968 | | // Record the frame index of the first variable argument |
17969 | | // which is a value necessary to VASTART. |
17970 | 0 | RVFI->setVarArgsFrameIndex(FI); |
17971 | 0 | RVFI->setVarArgsSaveSize(VarArgsSaveSize); |
17972 | 0 | } |
17973 | | |
17974 | | // All stores are grouped in one node to allow the matching between |
17975 | | // the size of Ins and InVals. This only happens for vararg functions. |
17976 | 6.24k | if (!OutChains.empty()) { |
17977 | 0 | OutChains.push_back(Chain); |
17978 | 0 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); |
17979 | 0 | } |
17980 | | |
17981 | 6.24k | return Chain; |
17982 | 6.24k | } |
17983 | | |
17984 | | /// isEligibleForTailCallOptimization - Check whether the call is eligible |
17985 | | /// for tail call optimization. |
17986 | | /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. |
17987 | | bool RISCVTargetLowering::isEligibleForTailCallOptimization( |
17988 | | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
17989 | 41 | const SmallVector<CCValAssign, 16> &ArgLocs) const { |
17990 | | |
17991 | 41 | auto CalleeCC = CLI.CallConv; |
17992 | 41 | auto &Outs = CLI.Outs; |
17993 | 41 | auto &Caller = MF.getFunction(); |
17994 | 41 | auto CallerCC = Caller.getCallingConv(); |
17995 | | |
17996 | | // Exception-handling functions need a special set of instructions to |
17997 | | // indicate a return to the hardware. Tail-calling another function would |
17998 | | // probably break this. |
17999 | | // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This |
18000 | | // should be expanded as new function attributes are introduced. |
18001 | 41 | if (Caller.hasFnAttribute("interrupt")) |
18002 | 0 | return false; |
18003 | | |
18004 | | // Do not tail call opt if the stack is used to pass parameters. |
18005 | 41 | if (CCInfo.getStackSize() != 0) |
18006 | 0 | return false; |
18007 | | |
18008 | | // Do not tail call opt if any parameters need to be passed indirectly. |
18009 | | // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are |
18010 | | // passed indirectly. So the address of the value will be passed in a |
18011 | | // register, or if not available, then the address is put on the stack. In |
18012 | | // order to pass indirectly, space on the stack often needs to be allocated |
18013 | | // in order to store the value. In this case the CCInfo.getNextStackOffset() |
18014 | | // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs |
18015 | | // are passed CCValAssign::Indirect. |
18016 | 41 | for (auto &VA : ArgLocs) |
18017 | 82 | if (VA.getLocInfo() == CCValAssign::Indirect) |
18018 | 0 | return false; |
18019 | | |
18020 | | // Do not tail call opt if either caller or callee uses struct return |
18021 | | // semantics. |
18022 | 41 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
18023 | 41 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
18024 | 41 | if (IsCallerStructRet || IsCalleeStructRet) |
18025 | 0 | return false; |
18026 | | |
18027 | | // The callee has to preserve all registers the caller needs to preserve. |
18028 | 41 | const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
18029 | 41 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
18030 | 41 | if (CalleeCC != CallerCC) { |
18031 | 0 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
18032 | 0 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
18033 | 0 | return false; |
18034 | 0 | } |
18035 | | |
18036 | | // Byval parameters hand the function a pointer directly into the stack area |
18037 | | // we want to reuse during a tail call. Working around this *is* possible |
18038 | | // but less efficient and uglier in LowerCall. |
18039 | 41 | for (auto &Arg : Outs) |
18040 | 82 | if (Arg.Flags.isByVal()) |
18041 | 0 | return false; |
18042 | | |
18043 | 41 | return true; |
18044 | 41 | } |
18045 | | |
18046 | 0 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
18047 | 0 | return DAG.getDataLayout().getPrefTypeAlign( |
18048 | 0 | VT.getTypeForEVT(*DAG.getContext())); |
18049 | 0 | } |
18050 | | |
18051 | | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
18052 | | // and output parameter nodes. |
18053 | | SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, |
18054 | 42.7k | SmallVectorImpl<SDValue> &InVals) const { |
18055 | 42.7k | SelectionDAG &DAG = CLI.DAG; |
18056 | 42.7k | SDLoc &DL = CLI.DL; |
18057 | 42.7k | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
18058 | 42.7k | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
18059 | 42.7k | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
18060 | 42.7k | SDValue Chain = CLI.Chain; |
18061 | 42.7k | SDValue Callee = CLI.Callee; |
18062 | 42.7k | bool &IsTailCall = CLI.IsTailCall; |
18063 | 42.7k | CallingConv::ID CallConv = CLI.CallConv; |
18064 | 42.7k | bool IsVarArg = CLI.IsVarArg; |
18065 | 42.7k | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
18066 | 42.7k | MVT XLenVT = Subtarget.getXLenVT(); |
18067 | | |
18068 | 42.7k | MachineFunction &MF = DAG.getMachineFunction(); |
18069 | | |
18070 | | // Analyze the operands of the call, assigning locations to each operand. |
18071 | 42.7k | SmallVector<CCValAssign, 16> ArgLocs; |
18072 | 42.7k | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
18073 | | |
18074 | 42.7k | if (CallConv == CallingConv::GHC) { |
18075 | 0 | if (Subtarget.isRVE()) |
18076 | 0 | report_fatal_error("GHC calling convention is not supported on RVE!"); |
18077 | 0 | ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); |
18078 | 0 | } else |
18079 | 42.7k | analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, |
18080 | 42.7k | CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC |
18081 | 42.7k | : RISCV::CC_RISCV); |
18082 | | |
18083 | | // Check if it's really possible to do a tail call. |
18084 | 42.7k | if (IsTailCall) |
18085 | 41 | IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); |
18086 | | |
18087 | 42.7k | if (IsTailCall) |
18088 | 41 | ++NumTailCalls; |
18089 | 42.7k | else if (CLI.CB && CLI.CB->isMustTailCall()) |
18090 | 0 | report_fatal_error("failed to perform tail call elimination on a call " |
18091 | 0 | "site marked musttail"); |
18092 | | |
18093 | | // Get a count of how many bytes are to be pushed on the stack. |
18094 | 42.7k | unsigned NumBytes = ArgCCInfo.getStackSize(); |
18095 | | |
18096 | | // Create local copies for byval args |
18097 | 42.7k | SmallVector<SDValue, 8> ByValArgs; |
18098 | 128k | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
18099 | 85.5k | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
18100 | 85.5k | if (!Flags.isByVal()) |
18101 | 85.5k | continue; |
18102 | | |
18103 | 0 | SDValue Arg = OutVals[i]; |
18104 | 0 | unsigned Size = Flags.getByValSize(); |
18105 | 0 | Align Alignment = Flags.getNonZeroByValAlign(); |
18106 | |
|
18107 | 0 | int FI = |
18108 | 0 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); |
18109 | 0 | SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
18110 | 0 | SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); |
18111 | |
|
18112 | 0 | Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, |
18113 | 0 | /*IsVolatile=*/false, |
18114 | 0 | /*AlwaysInline=*/false, IsTailCall, |
18115 | 0 | MachinePointerInfo(), MachinePointerInfo()); |
18116 | 0 | ByValArgs.push_back(FIPtr); |
18117 | 0 | } |
18118 | | |
18119 | 42.7k | if (!IsTailCall) |
18120 | 42.7k | Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); |
18121 | | |
18122 | | // Copy argument values to their designated locations. |
18123 | 42.7k | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
18124 | 42.7k | SmallVector<SDValue, 8> MemOpChains; |
18125 | 42.7k | SDValue StackPtr; |
18126 | 128k | for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; |
18127 | 85.5k | ++i, ++OutIdx) { |
18128 | 85.5k | CCValAssign &VA = ArgLocs[i]; |
18129 | 85.5k | SDValue ArgValue = OutVals[OutIdx]; |
18130 | 85.5k | ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; |
18131 | | |
18132 | | // Handle passing f64 on RV32D with a soft float ABI as a special case. |
18133 | 85.5k | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
18134 | 0 | assert(VA.isRegLoc() && "Expected register VA assignment"); |
18135 | 0 | assert(VA.needsCustom()); |
18136 | 0 | SDValue SplitF64 = DAG.getNode( |
18137 | 0 | RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); |
18138 | 0 | SDValue Lo = SplitF64.getValue(0); |
18139 | 0 | SDValue Hi = SplitF64.getValue(1); |
18140 | |
|
18141 | 0 | Register RegLo = VA.getLocReg(); |
18142 | 0 | RegsToPass.push_back(std::make_pair(RegLo, Lo)); |
18143 | | |
18144 | | // Get the CCValAssign for the Hi part. |
18145 | 0 | CCValAssign &HiVA = ArgLocs[++i]; |
18146 | |
|
18147 | 0 | if (HiVA.isMemLoc()) { |
18148 | | // Second half of f64 is passed on the stack. |
18149 | 0 | if (!StackPtr.getNode()) |
18150 | 0 | StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); |
18151 | 0 | SDValue Address = |
18152 | 0 | DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, |
18153 | 0 | DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL)); |
18154 | | // Emit the store. |
18155 | 0 | MemOpChains.push_back( |
18156 | 0 | DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo())); |
18157 | 0 | } else { |
18158 | | // Second half of f64 is passed in another GPR. |
18159 | 0 | Register RegHigh = HiVA.getLocReg(); |
18160 | 0 | RegsToPass.push_back(std::make_pair(RegHigh, Hi)); |
18161 | 0 | } |
18162 | 0 | continue; |
18163 | 0 | } |
18164 | | |
18165 | | // Promote the value if needed. |
18166 | | // For now, only handle fully promoted and indirect arguments. |
18167 | 85.5k | if (VA.getLocInfo() == CCValAssign::Indirect) { |
18168 | | // Store the argument in a stack slot and pass its address. |
18169 | 0 | Align StackAlign = |
18170 | 0 | std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG), |
18171 | 0 | getPrefTypeAlign(ArgValue.getValueType(), DAG)); |
18172 | 0 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
18173 | | // If the original argument was split (e.g. i128), we need |
18174 | | // to store the required parts of it here (and pass just one address). |
18175 | | // Vectors may be partly split to registers and partly to the stack, in |
18176 | | // which case the base address is partly offset and subsequent stores are |
18177 | | // relative to that. |
18178 | 0 | unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; |
18179 | 0 | unsigned ArgPartOffset = Outs[OutIdx].PartOffset; |
18180 | 0 | assert(VA.getValVT().isVector() || ArgPartOffset == 0); |
18181 | | // Calculate the total size to store. We don't have access to what we're |
18182 | | // actually storing other than performing the loop and collecting the |
18183 | | // info. |
18184 | 0 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
18185 | 0 | while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { |
18186 | 0 | SDValue PartValue = OutVals[OutIdx + 1]; |
18187 | 0 | unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; |
18188 | 0 | SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); |
18189 | 0 | EVT PartVT = PartValue.getValueType(); |
18190 | 0 | if (PartVT.isScalableVector()) |
18191 | 0 | Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); |
18192 | 0 | StoredSize += PartVT.getStoreSize(); |
18193 | 0 | StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); |
18194 | 0 | Parts.push_back(std::make_pair(PartValue, Offset)); |
18195 | 0 | ++i; |
18196 | 0 | ++OutIdx; |
18197 | 0 | } |
18198 | 0 | SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); |
18199 | 0 | int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); |
18200 | 0 | MemOpChains.push_back( |
18201 | 0 | DAG.getStore(Chain, DL, ArgValue, SpillSlot, |
18202 | 0 | MachinePointerInfo::getFixedStack(MF, FI))); |
18203 | 0 | for (const auto &Part : Parts) { |
18204 | 0 | SDValue PartValue = Part.first; |
18205 | 0 | SDValue PartOffset = Part.second; |
18206 | 0 | SDValue Address = |
18207 | 0 | DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); |
18208 | 0 | MemOpChains.push_back( |
18209 | 0 | DAG.getStore(Chain, DL, PartValue, Address, |
18210 | 0 | MachinePointerInfo::getFixedStack(MF, FI))); |
18211 | 0 | } |
18212 | 0 | ArgValue = SpillSlot; |
18213 | 85.5k | } else { |
18214 | 85.5k | ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); |
18215 | 85.5k | } |
18216 | | |
18217 | | // Use local copy if it is a byval arg. |
18218 | 85.5k | if (Flags.isByVal()) |
18219 | 0 | ArgValue = ByValArgs[j++]; |
18220 | | |
18221 | 85.5k | if (VA.isRegLoc()) { |
18222 | | // Queue up the argument copies and emit them at the end. |
18223 | 85.5k | RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); |
18224 | 85.5k | } else { |
18225 | 0 | assert(VA.isMemLoc() && "Argument not register or memory"); |
18226 | 0 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
18227 | 0 | "for passing parameters"); |
18228 | | |
18229 | | // Work out the address of the stack slot. |
18230 | 0 | if (!StackPtr.getNode()) |
18231 | 0 | StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); |
18232 | 0 | SDValue Address = |
18233 | 0 | DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, |
18234 | 0 | DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); |
18235 | | |
18236 | | // Emit the store. |
18237 | 0 | MemOpChains.push_back( |
18238 | 0 | DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); |
18239 | 0 | } |
18240 | 85.5k | } |
18241 | | |
18242 | | // Join the stores, which are independent of one another. |
18243 | 42.7k | if (!MemOpChains.empty()) |
18244 | 0 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
18245 | | |
18246 | 42.7k | SDValue Glue; |
18247 | | |
18248 | | // Build a sequence of copy-to-reg nodes, chained and glued together. |
18249 | 85.5k | for (auto &Reg : RegsToPass) { |
18250 | 85.5k | Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); |
18251 | 85.5k | Glue = Chain.getValue(1); |
18252 | 85.5k | } |
18253 | | |
18254 | | // Validate that none of the argument registers have been marked as |
18255 | | // reserved, if so report an error. Do the same for the return address if this |
18256 | | // is not a tailcall. |
18257 | 42.7k | validateCCReservedRegs(RegsToPass, MF); |
18258 | 42.7k | if (!IsTailCall && |
18259 | 42.7k | MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) |
18260 | 0 | MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ |
18261 | 0 | MF.getFunction(), |
18262 | 0 | "Return address register required, but has been reserved."}); |
18263 | | |
18264 | | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
18265 | | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
18266 | | // split it and then direct call can be matched by PseudoCALL. |
18267 | 42.7k | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { |
18268 | 0 | const GlobalValue *GV = S->getGlobal(); |
18269 | 0 | Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); |
18270 | 42.7k | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
18271 | 42.7k | Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL); |
18272 | 42.7k | } |
18273 | | |
18274 | | // The first call operand is the chain and the second is the target address. |
18275 | 42.7k | SmallVector<SDValue, 8> Ops; |
18276 | 42.7k | Ops.push_back(Chain); |
18277 | 42.7k | Ops.push_back(Callee); |
18278 | | |
18279 | | // Add argument registers to the end of the list so that they are |
18280 | | // known live into the call. |
18281 | 42.7k | for (auto &Reg : RegsToPass) |
18282 | 85.5k | Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); |
18283 | | |
18284 | 42.7k | if (!IsTailCall) { |
18285 | | // Add a register mask operand representing the call-preserved registers. |
18286 | 42.7k | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
18287 | 42.7k | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
18288 | 42.7k | assert(Mask && "Missing call preserved mask for calling convention"); |
18289 | 0 | Ops.push_back(DAG.getRegisterMask(Mask)); |
18290 | 42.7k | } |
18291 | | |
18292 | | // Glue the call to the argument copies, if any. |
18293 | 42.7k | if (Glue.getNode()) |
18294 | 42.7k | Ops.push_back(Glue); |
18295 | | |
18296 | 42.7k | assert((!CLI.CFIType || CLI.CB->isIndirectCall()) && |
18297 | 42.7k | "Unexpected CFI type for a direct call"); |
18298 | | |
18299 | | // Emit the call. |
18300 | 0 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
18301 | | |
18302 | 42.7k | if (IsTailCall) { |
18303 | 41 | MF.getFrameInfo().setHasTailCall(); |
18304 | 41 | SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); |
18305 | 41 | if (CLI.CFIType) |
18306 | 0 | Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
18307 | 41 | DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); |
18308 | 41 | return Ret; |
18309 | 41 | } |
18310 | | |
18311 | 42.7k | Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); |
18312 | 42.7k | if (CLI.CFIType) |
18313 | 0 | Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
18314 | 42.7k | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
18315 | 42.7k | Glue = Chain.getValue(1); |
18316 | | |
18317 | | // Mark the end of the call, which is glued to the call itself. |
18318 | 42.7k | Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); |
18319 | 42.7k | Glue = Chain.getValue(1); |
18320 | | |
18321 | | // Assign locations to each value returned by this call. |
18322 | 42.7k | SmallVector<CCValAssign, 16> RVLocs; |
18323 | 42.7k | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
18324 | 42.7k | analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); |
18325 | | |
18326 | | // Copy all of the result registers out of their specified physreg. |
18327 | 85.4k | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
18328 | 42.7k | auto &VA = RVLocs[i]; |
18329 | | // Copy the value out |
18330 | 42.7k | SDValue RetValue = |
18331 | 42.7k | DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); |
18332 | | // Glue the RetValue to the end of the call sequence |
18333 | 42.7k | Chain = RetValue.getValue(1); |
18334 | 42.7k | Glue = RetValue.getValue(2); |
18335 | | |
18336 | 42.7k | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
18337 | 0 | assert(VA.needsCustom()); |
18338 | 0 | SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(), |
18339 | 0 | MVT::i32, Glue); |
18340 | 0 | Chain = RetValue2.getValue(1); |
18341 | 0 | Glue = RetValue2.getValue(2); |
18342 | 0 | RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, |
18343 | 0 | RetValue2); |
18344 | 0 | } |
18345 | | |
18346 | 0 | RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); |
18347 | | |
18348 | 42.7k | InVals.push_back(RetValue); |
18349 | 42.7k | } |
18350 | | |
18351 | 42.7k | return Chain; |
18352 | 42.7k | } |
18353 | | |
18354 | | bool RISCVTargetLowering::CanLowerReturn( |
18355 | | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
18356 | 49.0k | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
18357 | 49.0k | SmallVector<CCValAssign, 16> RVLocs; |
18358 | 49.0k | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
18359 | | |
18360 | 49.0k | std::optional<unsigned> FirstMaskArgument; |
18361 | 49.0k | if (Subtarget.hasVInstructions()) |
18362 | 0 | FirstMaskArgument = preAssignMask(Outs); |
18363 | | |
18364 | 98.0k | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
18365 | 49.0k | MVT VT = Outs[i].VT; |
18366 | 49.0k | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
18367 | 49.0k | RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); |
18368 | 49.0k | if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, |
18369 | 49.0k | ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, |
18370 | 49.0k | *this, FirstMaskArgument)) |
18371 | 0 | return false; |
18372 | 49.0k | } |
18373 | 49.0k | return true; |
18374 | 49.0k | } |
18375 | | |
18376 | | SDValue |
18377 | | RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
18378 | | bool IsVarArg, |
18379 | | const SmallVectorImpl<ISD::OutputArg> &Outs, |
18380 | | const SmallVectorImpl<SDValue> &OutVals, |
18381 | 6.20k | const SDLoc &DL, SelectionDAG &DAG) const { |
18382 | 6.20k | MachineFunction &MF = DAG.getMachineFunction(); |
18383 | 6.20k | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
18384 | | |
18385 | | // Stores the assignment of the return value to a location. |
18386 | 6.20k | SmallVector<CCValAssign, 16> RVLocs; |
18387 | | |
18388 | | // Info about the registers and stack slot. |
18389 | 6.20k | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
18390 | 6.20k | *DAG.getContext()); |
18391 | | |
18392 | 6.20k | analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
18393 | 6.20k | nullptr, RISCV::CC_RISCV); |
18394 | | |
18395 | 6.20k | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
18396 | 0 | report_fatal_error("GHC functions return void only"); |
18397 | | |
18398 | 6.20k | SDValue Glue; |
18399 | 6.20k | SmallVector<SDValue, 4> RetOps(1, Chain); |
18400 | | |
18401 | | // Copy the result values into the output registers. |
18402 | 12.4k | for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { |
18403 | 6.20k | SDValue Val = OutVals[OutIdx]; |
18404 | 6.20k | CCValAssign &VA = RVLocs[i]; |
18405 | 6.20k | assert(VA.isRegLoc() && "Can only return in registers!"); |
18406 | | |
18407 | 6.20k | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
18408 | | // Handle returning f64 on RV32D with a soft float ABI. |
18409 | 0 | assert(VA.isRegLoc() && "Expected return via registers"); |
18410 | 0 | assert(VA.needsCustom()); |
18411 | 0 | SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, |
18412 | 0 | DAG.getVTList(MVT::i32, MVT::i32), Val); |
18413 | 0 | SDValue Lo = SplitF64.getValue(0); |
18414 | 0 | SDValue Hi = SplitF64.getValue(1); |
18415 | 0 | Register RegLo = VA.getLocReg(); |
18416 | 0 | Register RegHi = RVLocs[++i].getLocReg(); |
18417 | |
|
18418 | 0 | if (STI.isRegisterReservedByUser(RegLo) || |
18419 | 0 | STI.isRegisterReservedByUser(RegHi)) |
18420 | 0 | MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ |
18421 | 0 | MF.getFunction(), |
18422 | 0 | "Return value register required, but has been reserved."}); |
18423 | |
|
18424 | 0 | Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); |
18425 | 0 | Glue = Chain.getValue(1); |
18426 | 0 | RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); |
18427 | 0 | Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); |
18428 | 0 | Glue = Chain.getValue(1); |
18429 | 0 | RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); |
18430 | 6.20k | } else { |
18431 | | // Handle a 'normal' return. |
18432 | 6.20k | Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); |
18433 | 6.20k | Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); |
18434 | | |
18435 | 6.20k | if (STI.isRegisterReservedByUser(VA.getLocReg())) |
18436 | 0 | MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ |
18437 | 0 | MF.getFunction(), |
18438 | 0 | "Return value register required, but has been reserved."}); |
18439 | | |
18440 | | // Guarantee that all emitted copies are stuck together. |
18441 | 6.20k | Glue = Chain.getValue(1); |
18442 | 6.20k | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
18443 | 6.20k | } |
18444 | 6.20k | } |
18445 | | |
18446 | 6.20k | RetOps[0] = Chain; // Update chain. |
18447 | | |
18448 | | // Add the glue node if we have it. |
18449 | 6.20k | if (Glue.getNode()) { |
18450 | 6.20k | RetOps.push_back(Glue); |
18451 | 6.20k | } |
18452 | | |
18453 | 6.20k | if (any_of(RVLocs, |
18454 | 6.20k | [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) |
18455 | 0 | MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); |
18456 | | |
18457 | 6.20k | unsigned RetOpc = RISCVISD::RET_GLUE; |
18458 | | // Interrupt service routines use different return instructions. |
18459 | 6.20k | const Function &Func = DAG.getMachineFunction().getFunction(); |
18460 | 6.20k | if (Func.hasFnAttribute("interrupt")) { |
18461 | 0 | if (!Func.getReturnType()->isVoidTy()) |
18462 | 0 | report_fatal_error( |
18463 | 0 | "Functions with the interrupt attribute must have void return type!"); |
18464 | |
|
18465 | 0 | MachineFunction &MF = DAG.getMachineFunction(); |
18466 | 0 | StringRef Kind = |
18467 | 0 | MF.getFunction().getFnAttribute("interrupt").getValueAsString(); |
18468 | |
|
18469 | 0 | if (Kind == "supervisor") |
18470 | 0 | RetOpc = RISCVISD::SRET_GLUE; |
18471 | 0 | else |
18472 | 0 | RetOpc = RISCVISD::MRET_GLUE; |
18473 | 0 | } |
18474 | | |
18475 | 6.20k | return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); |
18476 | 6.20k | } |
18477 | | |
18478 | | void RISCVTargetLowering::validateCCReservedRegs( |
18479 | | const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, |
18480 | 42.7k | MachineFunction &MF) const { |
18481 | 42.7k | const Function &F = MF.getFunction(); |
18482 | 42.7k | const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); |
18483 | | |
18484 | 85.5k | if (llvm::any_of(Regs, [&STI](auto Reg) { |
18485 | 85.5k | return STI.isRegisterReservedByUser(Reg.first); |
18486 | 85.5k | })) |
18487 | 0 | F.getContext().diagnose(DiagnosticInfoUnsupported{ |
18488 | 0 | F, "Argument register required, but has been reserved."}); |
18489 | 42.7k | } |
18490 | | |
18491 | | // Check if the result of the node is only used as a return value, as |
18492 | | // otherwise we can't perform a tail-call. |
18493 | 12.1k | bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
18494 | 12.1k | if (N->getNumValues() != 1) |
18495 | 0 | return false; |
18496 | 12.1k | if (!N->hasNUsesOfValue(1, 0)) |
18497 | 3.15k | return false; |
18498 | | |
18499 | 9.00k | SDNode *Copy = *N->use_begin(); |
18500 | | |
18501 | 9.00k | if (Copy->getOpcode() == ISD::BITCAST) { |
18502 | 0 | return isUsedByReturnOnly(Copy, Chain); |
18503 | 0 | } |
18504 | | |
18505 | | // TODO: Handle additional opcodes in order to support tail-calling libcalls |
18506 | | // with soft float ABIs. |
18507 | 9.00k | if (Copy->getOpcode() != ISD::CopyToReg) { |
18508 | 8.23k | return false; |
18509 | 8.23k | } |
18510 | | |
18511 | | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
18512 | | // isn't safe to perform a tail call. |
18513 | 770 | if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) |
18514 | 52 | return false; |
18515 | | |
18516 | | // The copy must be used by a RISCVISD::RET_GLUE, and nothing else. |
18517 | 718 | bool HasRet = false; |
18518 | 951 | for (SDNode *Node : Copy->uses()) { |
18519 | 951 | if (Node->getOpcode() != RISCVISD::RET_GLUE) |
18520 | 485 | return false; |
18521 | 466 | HasRet = true; |
18522 | 466 | } |
18523 | 233 | if (!HasRet) |
18524 | 0 | return false; |
18525 | | |
18526 | 233 | Chain = Copy->getOperand(0); |
18527 | 233 | return true; |
18528 | 233 | } |
18529 | | |
18530 | 0 | bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
18531 | 0 | return CI->isTailCall(); |
18532 | 0 | } |
18533 | | |
18534 | 0 | const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { |
18535 | 0 | #define NODE_NAME_CASE(NODE) \ |
18536 | 0 | case RISCVISD::NODE: \ |
18537 | 0 | return "RISCVISD::" #NODE; |
18538 | | // clang-format off |
18539 | 0 | switch ((RISCVISD::NodeType)Opcode) { |
18540 | 0 | case RISCVISD::FIRST_NUMBER: |
18541 | 0 | break; |
18542 | 0 | NODE_NAME_CASE(RET_GLUE) |
18543 | 0 | NODE_NAME_CASE(SRET_GLUE) |
18544 | 0 | NODE_NAME_CASE(MRET_GLUE) |
18545 | 0 | NODE_NAME_CASE(CALL) |
18546 | 0 | NODE_NAME_CASE(SELECT_CC) |
18547 | 0 | NODE_NAME_CASE(BR_CC) |
18548 | 0 | NODE_NAME_CASE(BuildPairF64) |
18549 | 0 | NODE_NAME_CASE(SplitF64) |
18550 | 0 | NODE_NAME_CASE(TAIL) |
18551 | 0 | NODE_NAME_CASE(ADD_LO) |
18552 | 0 | NODE_NAME_CASE(HI) |
18553 | 0 | NODE_NAME_CASE(LLA) |
18554 | 0 | NODE_NAME_CASE(ADD_TPREL) |
18555 | 0 | NODE_NAME_CASE(MULHSU) |
18556 | 0 | NODE_NAME_CASE(SLLW) |
18557 | 0 | NODE_NAME_CASE(SRAW) |
18558 | 0 | NODE_NAME_CASE(SRLW) |
18559 | 0 | NODE_NAME_CASE(DIVW) |
18560 | 0 | NODE_NAME_CASE(DIVUW) |
18561 | 0 | NODE_NAME_CASE(REMUW) |
18562 | 0 | NODE_NAME_CASE(ROLW) |
18563 | 0 | NODE_NAME_CASE(RORW) |
18564 | 0 | NODE_NAME_CASE(CLZW) |
18565 | 0 | NODE_NAME_CASE(CTZW) |
18566 | 0 | NODE_NAME_CASE(ABSW) |
18567 | 0 | NODE_NAME_CASE(FMV_H_X) |
18568 | 0 | NODE_NAME_CASE(FMV_X_ANYEXTH) |
18569 | 0 | NODE_NAME_CASE(FMV_X_SIGNEXTH) |
18570 | 0 | NODE_NAME_CASE(FMV_W_X_RV64) |
18571 | 0 | NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) |
18572 | 0 | NODE_NAME_CASE(FCVT_X) |
18573 | 0 | NODE_NAME_CASE(FCVT_XU) |
18574 | 0 | NODE_NAME_CASE(FCVT_W_RV64) |
18575 | 0 | NODE_NAME_CASE(FCVT_WU_RV64) |
18576 | 0 | NODE_NAME_CASE(STRICT_FCVT_W_RV64) |
18577 | 0 | NODE_NAME_CASE(STRICT_FCVT_WU_RV64) |
18578 | 0 | NODE_NAME_CASE(FP_ROUND_BF16) |
18579 | 0 | NODE_NAME_CASE(FP_EXTEND_BF16) |
18580 | 0 | NODE_NAME_CASE(FROUND) |
18581 | 0 | NODE_NAME_CASE(FCLASS) |
18582 | 0 | NODE_NAME_CASE(FMAX) |
18583 | 0 | NODE_NAME_CASE(FMIN) |
18584 | 0 | NODE_NAME_CASE(READ_CYCLE_WIDE) |
18585 | 0 | NODE_NAME_CASE(BREV8) |
18586 | 0 | NODE_NAME_CASE(ORC_B) |
18587 | 0 | NODE_NAME_CASE(ZIP) |
18588 | 0 | NODE_NAME_CASE(UNZIP) |
18589 | 0 | NODE_NAME_CASE(CLMUL) |
18590 | 0 | NODE_NAME_CASE(CLMULH) |
18591 | 0 | NODE_NAME_CASE(CLMULR) |
18592 | 0 | NODE_NAME_CASE(SHA256SIG0) |
18593 | 0 | NODE_NAME_CASE(SHA256SIG1) |
18594 | 0 | NODE_NAME_CASE(SHA256SUM0) |
18595 | 0 | NODE_NAME_CASE(SHA256SUM1) |
18596 | 0 | NODE_NAME_CASE(SM4KS) |
18597 | 0 | NODE_NAME_CASE(SM4ED) |
18598 | 0 | NODE_NAME_CASE(SM3P0) |
18599 | 0 | NODE_NAME_CASE(SM3P1) |
18600 | 0 | NODE_NAME_CASE(TH_LWD) |
18601 | 0 | NODE_NAME_CASE(TH_LWUD) |
18602 | 0 | NODE_NAME_CASE(TH_LDD) |
18603 | 0 | NODE_NAME_CASE(TH_SWD) |
18604 | 0 | NODE_NAME_CASE(TH_SDD) |
18605 | 0 | NODE_NAME_CASE(VMV_V_V_VL) |
18606 | 0 | NODE_NAME_CASE(VMV_V_X_VL) |
18607 | 0 | NODE_NAME_CASE(VFMV_V_F_VL) |
18608 | 0 | NODE_NAME_CASE(VMV_X_S) |
18609 | 0 | NODE_NAME_CASE(VMV_S_X_VL) |
18610 | 0 | NODE_NAME_CASE(VFMV_S_F_VL) |
18611 | 0 | NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) |
18612 | 0 | NODE_NAME_CASE(READ_VLENB) |
18613 | 0 | NODE_NAME_CASE(TRUNCATE_VECTOR_VL) |
18614 | 0 | NODE_NAME_CASE(VSLIDEUP_VL) |
18615 | 0 | NODE_NAME_CASE(VSLIDE1UP_VL) |
18616 | 0 | NODE_NAME_CASE(VSLIDEDOWN_VL) |
18617 | 0 | NODE_NAME_CASE(VSLIDE1DOWN_VL) |
18618 | 0 | NODE_NAME_CASE(VFSLIDE1UP_VL) |
18619 | 0 | NODE_NAME_CASE(VFSLIDE1DOWN_VL) |
18620 | 0 | NODE_NAME_CASE(VID_VL) |
18621 | 0 | NODE_NAME_CASE(VFNCVT_ROD_VL) |
18622 | 0 | NODE_NAME_CASE(VECREDUCE_ADD_VL) |
18623 | 0 | NODE_NAME_CASE(VECREDUCE_UMAX_VL) |
18624 | 0 | NODE_NAME_CASE(VECREDUCE_SMAX_VL) |
18625 | 0 | NODE_NAME_CASE(VECREDUCE_UMIN_VL) |
18626 | 0 | NODE_NAME_CASE(VECREDUCE_SMIN_VL) |
18627 | 0 | NODE_NAME_CASE(VECREDUCE_AND_VL) |
18628 | 0 | NODE_NAME_CASE(VECREDUCE_OR_VL) |
18629 | 0 | NODE_NAME_CASE(VECREDUCE_XOR_VL) |
18630 | 0 | NODE_NAME_CASE(VECREDUCE_FADD_VL) |
18631 | 0 | NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) |
18632 | 0 | NODE_NAME_CASE(VECREDUCE_FMIN_VL) |
18633 | 0 | NODE_NAME_CASE(VECREDUCE_FMAX_VL) |
18634 | 0 | NODE_NAME_CASE(ADD_VL) |
18635 | 0 | NODE_NAME_CASE(AND_VL) |
18636 | 0 | NODE_NAME_CASE(MUL_VL) |
18637 | 0 | NODE_NAME_CASE(OR_VL) |
18638 | 0 | NODE_NAME_CASE(SDIV_VL) |
18639 | 0 | NODE_NAME_CASE(SHL_VL) |
18640 | 0 | NODE_NAME_CASE(SREM_VL) |
18641 | 0 | NODE_NAME_CASE(SRA_VL) |
18642 | 0 | NODE_NAME_CASE(SRL_VL) |
18643 | 0 | NODE_NAME_CASE(ROTL_VL) |
18644 | 0 | NODE_NAME_CASE(ROTR_VL) |
18645 | 0 | NODE_NAME_CASE(SUB_VL) |
18646 | 0 | NODE_NAME_CASE(UDIV_VL) |
18647 | 0 | NODE_NAME_CASE(UREM_VL) |
18648 | 0 | NODE_NAME_CASE(XOR_VL) |
18649 | 0 | NODE_NAME_CASE(AVGFLOORU_VL) |
18650 | 0 | NODE_NAME_CASE(AVGCEILU_VL) |
18651 | 0 | NODE_NAME_CASE(SADDSAT_VL) |
18652 | 0 | NODE_NAME_CASE(UADDSAT_VL) |
18653 | 0 | NODE_NAME_CASE(SSUBSAT_VL) |
18654 | 0 | NODE_NAME_CASE(USUBSAT_VL) |
18655 | 0 | NODE_NAME_CASE(FADD_VL) |
18656 | 0 | NODE_NAME_CASE(FSUB_VL) |
18657 | 0 | NODE_NAME_CASE(FMUL_VL) |
18658 | 0 | NODE_NAME_CASE(FDIV_VL) |
18659 | 0 | NODE_NAME_CASE(FNEG_VL) |
18660 | 0 | NODE_NAME_CASE(FABS_VL) |
18661 | 0 | NODE_NAME_CASE(FSQRT_VL) |
18662 | 0 | NODE_NAME_CASE(FCLASS_VL) |
18663 | 0 | NODE_NAME_CASE(VFMADD_VL) |
18664 | 0 | NODE_NAME_CASE(VFNMADD_VL) |
18665 | 0 | NODE_NAME_CASE(VFMSUB_VL) |
18666 | 0 | NODE_NAME_CASE(VFNMSUB_VL) |
18667 | 0 | NODE_NAME_CASE(VFWMADD_VL) |
18668 | 0 | NODE_NAME_CASE(VFWNMADD_VL) |
18669 | 0 | NODE_NAME_CASE(VFWMSUB_VL) |
18670 | 0 | NODE_NAME_CASE(VFWNMSUB_VL) |
18671 | 0 | NODE_NAME_CASE(FCOPYSIGN_VL) |
18672 | 0 | NODE_NAME_CASE(SMIN_VL) |
18673 | 0 | NODE_NAME_CASE(SMAX_VL) |
18674 | 0 | NODE_NAME_CASE(UMIN_VL) |
18675 | 0 | NODE_NAME_CASE(UMAX_VL) |
18676 | 0 | NODE_NAME_CASE(BITREVERSE_VL) |
18677 | 0 | NODE_NAME_CASE(BSWAP_VL) |
18678 | 0 | NODE_NAME_CASE(CTLZ_VL) |
18679 | 0 | NODE_NAME_CASE(CTTZ_VL) |
18680 | 0 | NODE_NAME_CASE(CTPOP_VL) |
18681 | 0 | NODE_NAME_CASE(VFMIN_VL) |
18682 | 0 | NODE_NAME_CASE(VFMAX_VL) |
18683 | 0 | NODE_NAME_CASE(MULHS_VL) |
18684 | 0 | NODE_NAME_CASE(MULHU_VL) |
18685 | 0 | NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) |
18686 | 0 | NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) |
18687 | 0 | NODE_NAME_CASE(VFCVT_RM_X_F_VL) |
18688 | 0 | NODE_NAME_CASE(VFCVT_RM_XU_F_VL) |
18689 | 0 | NODE_NAME_CASE(VFCVT_X_F_VL) |
18690 | 0 | NODE_NAME_CASE(VFCVT_XU_F_VL) |
18691 | 0 | NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) |
18692 | 0 | NODE_NAME_CASE(SINT_TO_FP_VL) |
18693 | 0 | NODE_NAME_CASE(UINT_TO_FP_VL) |
18694 | 0 | NODE_NAME_CASE(VFCVT_RM_F_XU_VL) |
18695 | 0 | NODE_NAME_CASE(VFCVT_RM_F_X_VL) |
18696 | 0 | NODE_NAME_CASE(FP_EXTEND_VL) |
18697 | 0 | NODE_NAME_CASE(FP_ROUND_VL) |
18698 | 0 | NODE_NAME_CASE(STRICT_FADD_VL) |
18699 | 0 | NODE_NAME_CASE(STRICT_FSUB_VL) |
18700 | 0 | NODE_NAME_CASE(STRICT_FMUL_VL) |
18701 | 0 | NODE_NAME_CASE(STRICT_FDIV_VL) |
18702 | 0 | NODE_NAME_CASE(STRICT_FSQRT_VL) |
18703 | 0 | NODE_NAME_CASE(STRICT_VFMADD_VL) |
18704 | 0 | NODE_NAME_CASE(STRICT_VFNMADD_VL) |
18705 | 0 | NODE_NAME_CASE(STRICT_VFMSUB_VL) |
18706 | 0 | NODE_NAME_CASE(STRICT_VFNMSUB_VL) |
18707 | 0 | NODE_NAME_CASE(STRICT_FP_ROUND_VL) |
18708 | 0 | NODE_NAME_CASE(STRICT_FP_EXTEND_VL) |
18709 | 0 | NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL) |
18710 | 0 | NODE_NAME_CASE(STRICT_SINT_TO_FP_VL) |
18711 | 0 | NODE_NAME_CASE(STRICT_UINT_TO_FP_VL) |
18712 | 0 | NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL) |
18713 | 0 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL) |
18714 | 0 | NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL) |
18715 | 0 | NODE_NAME_CASE(STRICT_FSETCC_VL) |
18716 | 0 | NODE_NAME_CASE(STRICT_FSETCCS_VL) |
18717 | 0 | NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL) |
18718 | 0 | NODE_NAME_CASE(VWMUL_VL) |
18719 | 0 | NODE_NAME_CASE(VWMULU_VL) |
18720 | 0 | NODE_NAME_CASE(VWMULSU_VL) |
18721 | 0 | NODE_NAME_CASE(VWADD_VL) |
18722 | 0 | NODE_NAME_CASE(VWADDU_VL) |
18723 | 0 | NODE_NAME_CASE(VWSUB_VL) |
18724 | 0 | NODE_NAME_CASE(VWSUBU_VL) |
18725 | 0 | NODE_NAME_CASE(VWADD_W_VL) |
18726 | 0 | NODE_NAME_CASE(VWADDU_W_VL) |
18727 | 0 | NODE_NAME_CASE(VWSUB_W_VL) |
18728 | 0 | NODE_NAME_CASE(VWSUBU_W_VL) |
18729 | 0 | NODE_NAME_CASE(VWSLL_VL) |
18730 | 0 | NODE_NAME_CASE(VFWMUL_VL) |
18731 | 0 | NODE_NAME_CASE(VFWADD_VL) |
18732 | 0 | NODE_NAME_CASE(VFWSUB_VL) |
18733 | 0 | NODE_NAME_CASE(VFWADD_W_VL) |
18734 | 0 | NODE_NAME_CASE(VFWSUB_W_VL) |
18735 | 0 | NODE_NAME_CASE(VWMACC_VL) |
18736 | 0 | NODE_NAME_CASE(VWMACCU_VL) |
18737 | 0 | NODE_NAME_CASE(VWMACCSU_VL) |
18738 | 0 | NODE_NAME_CASE(VNSRL_VL) |
18739 | 0 | NODE_NAME_CASE(SETCC_VL) |
18740 | 0 | NODE_NAME_CASE(VMERGE_VL) |
18741 | 0 | NODE_NAME_CASE(VMAND_VL) |
18742 | 0 | NODE_NAME_CASE(VMOR_VL) |
18743 | 0 | NODE_NAME_CASE(VMXOR_VL) |
18744 | 0 | NODE_NAME_CASE(VMCLR_VL) |
18745 | 0 | NODE_NAME_CASE(VMSET_VL) |
18746 | 0 | NODE_NAME_CASE(VRGATHER_VX_VL) |
18747 | 0 | NODE_NAME_CASE(VRGATHER_VV_VL) |
18748 | 0 | NODE_NAME_CASE(VRGATHEREI16_VV_VL) |
18749 | 0 | NODE_NAME_CASE(VSEXT_VL) |
18750 | 0 | NODE_NAME_CASE(VZEXT_VL) |
18751 | 0 | NODE_NAME_CASE(VCPOP_VL) |
18752 | 0 | NODE_NAME_CASE(VFIRST_VL) |
18753 | 0 | NODE_NAME_CASE(READ_CSR) |
18754 | 0 | NODE_NAME_CASE(WRITE_CSR) |
18755 | 0 | NODE_NAME_CASE(SWAP_CSR) |
18756 | 0 | NODE_NAME_CASE(CZERO_EQZ) |
18757 | 0 | NODE_NAME_CASE(CZERO_NEZ) |
18758 | 0 | } |
18759 | | // clang-format on |
18760 | 0 | return nullptr; |
18761 | 0 | #undef NODE_NAME_CASE |
18762 | 0 | } |
18763 | | |
18764 | | /// getConstraintType - Given a constraint letter, return the type of |
18765 | | /// constraint it is for this target. |
18766 | | RISCVTargetLowering::ConstraintType |
18767 | 0 | RISCVTargetLowering::getConstraintType(StringRef Constraint) const { |
18768 | 0 | if (Constraint.size() == 1) { |
18769 | 0 | switch (Constraint[0]) { |
18770 | 0 | default: |
18771 | 0 | break; |
18772 | 0 | case 'f': |
18773 | 0 | return C_RegisterClass; |
18774 | 0 | case 'I': |
18775 | 0 | case 'J': |
18776 | 0 | case 'K': |
18777 | 0 | return C_Immediate; |
18778 | 0 | case 'A': |
18779 | 0 | return C_Memory; |
18780 | 0 | case 'S': // A symbolic address |
18781 | 0 | return C_Other; |
18782 | 0 | } |
18783 | 0 | } else { |
18784 | 0 | if (Constraint == "vr" || Constraint == "vm") |
18785 | 0 | return C_RegisterClass; |
18786 | 0 | } |
18787 | 0 | return TargetLowering::getConstraintType(Constraint); |
18788 | 0 | } |
18789 | | |
18790 | | std::pair<unsigned, const TargetRegisterClass *> |
18791 | | RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
18792 | | StringRef Constraint, |
18793 | 0 | MVT VT) const { |
18794 | | // First, see if this is a constraint that directly corresponds to a RISC-V |
18795 | | // register class. |
18796 | 0 | if (Constraint.size() == 1) { |
18797 | 0 | switch (Constraint[0]) { |
18798 | 0 | case 'r': |
18799 | | // TODO: Support fixed vectors up to XLen for P extension? |
18800 | 0 | if (VT.isVector()) |
18801 | 0 | break; |
18802 | 0 | if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin()) |
18803 | 0 | return std::make_pair(0U, &RISCV::GPRF16RegClass); |
18804 | 0 | if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) |
18805 | 0 | return std::make_pair(0U, &RISCV::GPRF32RegClass); |
18806 | 0 | if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) |
18807 | 0 | return std::make_pair(0U, &RISCV::GPRPairRegClass); |
18808 | 0 | return std::make_pair(0U, &RISCV::GPRNoX0RegClass); |
18809 | 0 | case 'f': |
18810 | 0 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) |
18811 | 0 | return std::make_pair(0U, &RISCV::FPR16RegClass); |
18812 | 0 | if (Subtarget.hasStdExtF() && VT == MVT::f32) |
18813 | 0 | return std::make_pair(0U, &RISCV::FPR32RegClass); |
18814 | 0 | if (Subtarget.hasStdExtD() && VT == MVT::f64) |
18815 | 0 | return std::make_pair(0U, &RISCV::FPR64RegClass); |
18816 | 0 | break; |
18817 | 0 | default: |
18818 | 0 | break; |
18819 | 0 | } |
18820 | 0 | } else if (Constraint == "vr") { |
18821 | 0 | for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, |
18822 | 0 | &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
18823 | 0 | if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) |
18824 | 0 | return std::make_pair(0U, RC); |
18825 | 0 | } |
18826 | 0 | } else if (Constraint == "vm") { |
18827 | 0 | if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) |
18828 | 0 | return std::make_pair(0U, &RISCV::VMV0RegClass); |
18829 | 0 | } |
18830 | | |
18831 | | // Clang will correctly decode the usage of register name aliases into their |
18832 | | // official names. However, other frontends like `rustc` do not. This allows |
18833 | | // users of these frontends to use the ABI names for registers in LLVM-style |
18834 | | // register constraints. |
18835 | 0 | unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) |
18836 | 0 | .Case("{zero}", RISCV::X0) |
18837 | 0 | .Case("{ra}", RISCV::X1) |
18838 | 0 | .Case("{sp}", RISCV::X2) |
18839 | 0 | .Case("{gp}", RISCV::X3) |
18840 | 0 | .Case("{tp}", RISCV::X4) |
18841 | 0 | .Case("{t0}", RISCV::X5) |
18842 | 0 | .Case("{t1}", RISCV::X6) |
18843 | 0 | .Case("{t2}", RISCV::X7) |
18844 | 0 | .Cases("{s0}", "{fp}", RISCV::X8) |
18845 | 0 | .Case("{s1}", RISCV::X9) |
18846 | 0 | .Case("{a0}", RISCV::X10) |
18847 | 0 | .Case("{a1}", RISCV::X11) |
18848 | 0 | .Case("{a2}", RISCV::X12) |
18849 | 0 | .Case("{a3}", RISCV::X13) |
18850 | 0 | .Case("{a4}", RISCV::X14) |
18851 | 0 | .Case("{a5}", RISCV::X15) |
18852 | 0 | .Case("{a6}", RISCV::X16) |
18853 | 0 | .Case("{a7}", RISCV::X17) |
18854 | 0 | .Case("{s2}", RISCV::X18) |
18855 | 0 | .Case("{s3}", RISCV::X19) |
18856 | 0 | .Case("{s4}", RISCV::X20) |
18857 | 0 | .Case("{s5}", RISCV::X21) |
18858 | 0 | .Case("{s6}", RISCV::X22) |
18859 | 0 | .Case("{s7}", RISCV::X23) |
18860 | 0 | .Case("{s8}", RISCV::X24) |
18861 | 0 | .Case("{s9}", RISCV::X25) |
18862 | 0 | .Case("{s10}", RISCV::X26) |
18863 | 0 | .Case("{s11}", RISCV::X27) |
18864 | 0 | .Case("{t3}", RISCV::X28) |
18865 | 0 | .Case("{t4}", RISCV::X29) |
18866 | 0 | .Case("{t5}", RISCV::X30) |
18867 | 0 | .Case("{t6}", RISCV::X31) |
18868 | 0 | .Default(RISCV::NoRegister); |
18869 | 0 | if (XRegFromAlias != RISCV::NoRegister) |
18870 | 0 | return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); |
18871 | | |
18872 | | // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the |
18873 | | // TableGen record rather than the AsmName to choose registers for InlineAsm |
18874 | | // constraints, plus we want to match those names to the widest floating point |
18875 | | // register type available, manually select floating point registers here. |
18876 | | // |
18877 | | // The second case is the ABI name of the register, so that frontends can also |
18878 | | // use the ABI names in register constraint lists. |
18879 | 0 | if (Subtarget.hasStdExtF()) { |
18880 | 0 | unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) |
18881 | 0 | .Cases("{f0}", "{ft0}", RISCV::F0_F) |
18882 | 0 | .Cases("{f1}", "{ft1}", RISCV::F1_F) |
18883 | 0 | .Cases("{f2}", "{ft2}", RISCV::F2_F) |
18884 | 0 | .Cases("{f3}", "{ft3}", RISCV::F3_F) |
18885 | 0 | .Cases("{f4}", "{ft4}", RISCV::F4_F) |
18886 | 0 | .Cases("{f5}", "{ft5}", RISCV::F5_F) |
18887 | 0 | .Cases("{f6}", "{ft6}", RISCV::F6_F) |
18888 | 0 | .Cases("{f7}", "{ft7}", RISCV::F7_F) |
18889 | 0 | .Cases("{f8}", "{fs0}", RISCV::F8_F) |
18890 | 0 | .Cases("{f9}", "{fs1}", RISCV::F9_F) |
18891 | 0 | .Cases("{f10}", "{fa0}", RISCV::F10_F) |
18892 | 0 | .Cases("{f11}", "{fa1}", RISCV::F11_F) |
18893 | 0 | .Cases("{f12}", "{fa2}", RISCV::F12_F) |
18894 | 0 | .Cases("{f13}", "{fa3}", RISCV::F13_F) |
18895 | 0 | .Cases("{f14}", "{fa4}", RISCV::F14_F) |
18896 | 0 | .Cases("{f15}", "{fa5}", RISCV::F15_F) |
18897 | 0 | .Cases("{f16}", "{fa6}", RISCV::F16_F) |
18898 | 0 | .Cases("{f17}", "{fa7}", RISCV::F17_F) |
18899 | 0 | .Cases("{f18}", "{fs2}", RISCV::F18_F) |
18900 | 0 | .Cases("{f19}", "{fs3}", RISCV::F19_F) |
18901 | 0 | .Cases("{f20}", "{fs4}", RISCV::F20_F) |
18902 | 0 | .Cases("{f21}", "{fs5}", RISCV::F21_F) |
18903 | 0 | .Cases("{f22}", "{fs6}", RISCV::F22_F) |
18904 | 0 | .Cases("{f23}", "{fs7}", RISCV::F23_F) |
18905 | 0 | .Cases("{f24}", "{fs8}", RISCV::F24_F) |
18906 | 0 | .Cases("{f25}", "{fs9}", RISCV::F25_F) |
18907 | 0 | .Cases("{f26}", "{fs10}", RISCV::F26_F) |
18908 | 0 | .Cases("{f27}", "{fs11}", RISCV::F27_F) |
18909 | 0 | .Cases("{f28}", "{ft8}", RISCV::F28_F) |
18910 | 0 | .Cases("{f29}", "{ft9}", RISCV::F29_F) |
18911 | 0 | .Cases("{f30}", "{ft10}", RISCV::F30_F) |
18912 | 0 | .Cases("{f31}", "{ft11}", RISCV::F31_F) |
18913 | 0 | .Default(RISCV::NoRegister); |
18914 | 0 | if (FReg != RISCV::NoRegister) { |
18915 | 0 | assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); |
18916 | 0 | if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { |
18917 | 0 | unsigned RegNo = FReg - RISCV::F0_F; |
18918 | 0 | unsigned DReg = RISCV::F0_D + RegNo; |
18919 | 0 | return std::make_pair(DReg, &RISCV::FPR64RegClass); |
18920 | 0 | } |
18921 | 0 | if (VT == MVT::f32 || VT == MVT::Other) |
18922 | 0 | return std::make_pair(FReg, &RISCV::FPR32RegClass); |
18923 | 0 | if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) { |
18924 | 0 | unsigned RegNo = FReg - RISCV::F0_F; |
18925 | 0 | unsigned HReg = RISCV::F0_H + RegNo; |
18926 | 0 | return std::make_pair(HReg, &RISCV::FPR16RegClass); |
18927 | 0 | } |
18928 | 0 | } |
18929 | 0 | } |
18930 | | |
18931 | 0 | if (Subtarget.hasVInstructions()) { |
18932 | 0 | Register VReg = StringSwitch<Register>(Constraint.lower()) |
18933 | 0 | .Case("{v0}", RISCV::V0) |
18934 | 0 | .Case("{v1}", RISCV::V1) |
18935 | 0 | .Case("{v2}", RISCV::V2) |
18936 | 0 | .Case("{v3}", RISCV::V3) |
18937 | 0 | .Case("{v4}", RISCV::V4) |
18938 | 0 | .Case("{v5}", RISCV::V5) |
18939 | 0 | .Case("{v6}", RISCV::V6) |
18940 | 0 | .Case("{v7}", RISCV::V7) |
18941 | 0 | .Case("{v8}", RISCV::V8) |
18942 | 0 | .Case("{v9}", RISCV::V9) |
18943 | 0 | .Case("{v10}", RISCV::V10) |
18944 | 0 | .Case("{v11}", RISCV::V11) |
18945 | 0 | .Case("{v12}", RISCV::V12) |
18946 | 0 | .Case("{v13}", RISCV::V13) |
18947 | 0 | .Case("{v14}", RISCV::V14) |
18948 | 0 | .Case("{v15}", RISCV::V15) |
18949 | 0 | .Case("{v16}", RISCV::V16) |
18950 | 0 | .Case("{v17}", RISCV::V17) |
18951 | 0 | .Case("{v18}", RISCV::V18) |
18952 | 0 | .Case("{v19}", RISCV::V19) |
18953 | 0 | .Case("{v20}", RISCV::V20) |
18954 | 0 | .Case("{v21}", RISCV::V21) |
18955 | 0 | .Case("{v22}", RISCV::V22) |
18956 | 0 | .Case("{v23}", RISCV::V23) |
18957 | 0 | .Case("{v24}", RISCV::V24) |
18958 | 0 | .Case("{v25}", RISCV::V25) |
18959 | 0 | .Case("{v26}", RISCV::V26) |
18960 | 0 | .Case("{v27}", RISCV::V27) |
18961 | 0 | .Case("{v28}", RISCV::V28) |
18962 | 0 | .Case("{v29}", RISCV::V29) |
18963 | 0 | .Case("{v30}", RISCV::V30) |
18964 | 0 | .Case("{v31}", RISCV::V31) |
18965 | 0 | .Default(RISCV::NoRegister); |
18966 | 0 | if (VReg != RISCV::NoRegister) { |
18967 | 0 | if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) |
18968 | 0 | return std::make_pair(VReg, &RISCV::VMRegClass); |
18969 | 0 | if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) |
18970 | 0 | return std::make_pair(VReg, &RISCV::VRRegClass); |
18971 | 0 | for (const auto *RC : |
18972 | 0 | {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { |
18973 | 0 | if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { |
18974 | 0 | VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); |
18975 | 0 | return std::make_pair(VReg, RC); |
18976 | 0 | } |
18977 | 0 | } |
18978 | 0 | } |
18979 | 0 | } |
18980 | | |
18981 | 0 | std::pair<Register, const TargetRegisterClass *> Res = |
18982 | 0 | TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
18983 | | |
18984 | | // If we picked one of the Zfinx register classes, remap it to the GPR class. |
18985 | | // FIXME: When Zfinx is supported in CodeGen this will need to take the |
18986 | | // Subtarget into account. |
18987 | 0 | if (Res.second == &RISCV::GPRF16RegClass || |
18988 | 0 | Res.second == &RISCV::GPRF32RegClass || |
18989 | 0 | Res.second == &RISCV::GPRPairRegClass) |
18990 | 0 | return std::make_pair(Res.first, &RISCV::GPRRegClass); |
18991 | | |
18992 | 0 | return Res; |
18993 | 0 | } |
18994 | | |
18995 | | InlineAsm::ConstraintCode |
18996 | 0 | RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
18997 | | // Currently only support length 1 constraints. |
18998 | 0 | if (ConstraintCode.size() == 1) { |
18999 | 0 | switch (ConstraintCode[0]) { |
19000 | 0 | case 'A': |
19001 | 0 | return InlineAsm::ConstraintCode::A; |
19002 | 0 | default: |
19003 | 0 | break; |
19004 | 0 | } |
19005 | 0 | } |
19006 | | |
19007 | 0 | return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); |
19008 | 0 | } |
19009 | | |
19010 | | void RISCVTargetLowering::LowerAsmOperandForConstraint( |
19011 | | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
19012 | 0 | SelectionDAG &DAG) const { |
19013 | | // Currently only support length 1 constraints. |
19014 | 0 | if (Constraint.size() == 1) { |
19015 | 0 | switch (Constraint[0]) { |
19016 | 0 | case 'I': |
19017 | | // Validate & create a 12-bit signed immediate operand. |
19018 | 0 | if (auto *C = dyn_cast<ConstantSDNode>(Op)) { |
19019 | 0 | uint64_t CVal = C->getSExtValue(); |
19020 | 0 | if (isInt<12>(CVal)) |
19021 | 0 | Ops.push_back( |
19022 | 0 | DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); |
19023 | 0 | } |
19024 | 0 | return; |
19025 | 0 | case 'J': |
19026 | | // Validate & create an integer zero operand. |
19027 | 0 | if (isNullConstant(Op)) |
19028 | 0 | Ops.push_back( |
19029 | 0 | DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); |
19030 | 0 | return; |
19031 | 0 | case 'K': |
19032 | | // Validate & create a 5-bit unsigned immediate operand. |
19033 | 0 | if (auto *C = dyn_cast<ConstantSDNode>(Op)) { |
19034 | 0 | uint64_t CVal = C->getZExtValue(); |
19035 | 0 | if (isUInt<5>(CVal)) |
19036 | 0 | Ops.push_back( |
19037 | 0 | DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); |
19038 | 0 | } |
19039 | 0 | return; |
19040 | 0 | case 'S': |
19041 | 0 | if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { |
19042 | 0 | Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), |
19043 | 0 | GA->getValueType(0))); |
19044 | 0 | } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { |
19045 | 0 | Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), |
19046 | 0 | BA->getValueType(0))); |
19047 | 0 | } |
19048 | 0 | return; |
19049 | 0 | default: |
19050 | 0 | break; |
19051 | 0 | } |
19052 | 0 | } |
19053 | 0 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
19054 | 0 | } |
19055 | | |
19056 | | Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, |
19057 | | Instruction *Inst, |
19058 | 0 | AtomicOrdering Ord) const { |
19059 | 0 | if (Subtarget.hasStdExtZtso()) { |
19060 | 0 | if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
19061 | 0 | return Builder.CreateFence(Ord); |
19062 | 0 | return nullptr; |
19063 | 0 | } |
19064 | | |
19065 | 0 | if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
19066 | 0 | return Builder.CreateFence(Ord); |
19067 | 0 | if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) |
19068 | 0 | return Builder.CreateFence(AtomicOrdering::Release); |
19069 | 0 | return nullptr; |
19070 | 0 | } |
19071 | | |
19072 | | Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, |
19073 | | Instruction *Inst, |
19074 | 0 | AtomicOrdering Ord) const { |
19075 | 0 | if (Subtarget.hasStdExtZtso()) { |
19076 | 0 | if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) |
19077 | 0 | return Builder.CreateFence(Ord); |
19078 | 0 | return nullptr; |
19079 | 0 | } |
19080 | | |
19081 | 0 | if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) |
19082 | 0 | return Builder.CreateFence(AtomicOrdering::Acquire); |
19083 | 0 | if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) && |
19084 | 0 | Ord == AtomicOrdering::SequentiallyConsistent) |
19085 | 0 | return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); |
19086 | 0 | return nullptr; |
19087 | 0 | } |
19088 | | |
19089 | | TargetLowering::AtomicExpansionKind |
19090 | 0 | RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
19091 | | // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating |
19092 | | // point operations can't be used in an lr/sc sequence without breaking the |
19093 | | // forward-progress guarantee. |
19094 | 0 | if (AI->isFloatingPointOperation() || |
19095 | 0 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
19096 | 0 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
19097 | 0 | return AtomicExpansionKind::CmpXChg; |
19098 | | |
19099 | | // Don't expand forced atomics, we want to have __sync libcalls instead. |
19100 | 0 | if (Subtarget.hasForcedAtomics()) |
19101 | 0 | return AtomicExpansionKind::None; |
19102 | | |
19103 | 0 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
19104 | 0 | if (Size == 8 || Size == 16) |
19105 | 0 | return AtomicExpansionKind::MaskedIntrinsic; |
19106 | 0 | return AtomicExpansionKind::None; |
19107 | 0 | } |
19108 | | |
19109 | | static Intrinsic::ID |
19110 | 0 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { |
19111 | 0 | if (XLen == 32) { |
19112 | 0 | switch (BinOp) { |
19113 | 0 | default: |
19114 | 0 | llvm_unreachable("Unexpected AtomicRMW BinOp"); |
19115 | 0 | case AtomicRMWInst::Xchg: |
19116 | 0 | return Intrinsic::riscv_masked_atomicrmw_xchg_i32; |
19117 | 0 | case AtomicRMWInst::Add: |
19118 | 0 | return Intrinsic::riscv_masked_atomicrmw_add_i32; |
19119 | 0 | case AtomicRMWInst::Sub: |
19120 | 0 | return Intrinsic::riscv_masked_atomicrmw_sub_i32; |
19121 | 0 | case AtomicRMWInst::Nand: |
19122 | 0 | return Intrinsic::riscv_masked_atomicrmw_nand_i32; |
19123 | 0 | case AtomicRMWInst::Max: |
19124 | 0 | return Intrinsic::riscv_masked_atomicrmw_max_i32; |
19125 | 0 | case AtomicRMWInst::Min: |
19126 | 0 | return Intrinsic::riscv_masked_atomicrmw_min_i32; |
19127 | 0 | case AtomicRMWInst::UMax: |
19128 | 0 | return Intrinsic::riscv_masked_atomicrmw_umax_i32; |
19129 | 0 | case AtomicRMWInst::UMin: |
19130 | 0 | return Intrinsic::riscv_masked_atomicrmw_umin_i32; |
19131 | 0 | } |
19132 | 0 | } |
19133 | | |
19134 | 0 | if (XLen == 64) { |
19135 | 0 | switch (BinOp) { |
19136 | 0 | default: |
19137 | 0 | llvm_unreachable("Unexpected AtomicRMW BinOp"); |
19138 | 0 | case AtomicRMWInst::Xchg: |
19139 | 0 | return Intrinsic::riscv_masked_atomicrmw_xchg_i64; |
19140 | 0 | case AtomicRMWInst::Add: |
19141 | 0 | return Intrinsic::riscv_masked_atomicrmw_add_i64; |
19142 | 0 | case AtomicRMWInst::Sub: |
19143 | 0 | return Intrinsic::riscv_masked_atomicrmw_sub_i64; |
19144 | 0 | case AtomicRMWInst::Nand: |
19145 | 0 | return Intrinsic::riscv_masked_atomicrmw_nand_i64; |
19146 | 0 | case AtomicRMWInst::Max: |
19147 | 0 | return Intrinsic::riscv_masked_atomicrmw_max_i64; |
19148 | 0 | case AtomicRMWInst::Min: |
19149 | 0 | return Intrinsic::riscv_masked_atomicrmw_min_i64; |
19150 | 0 | case AtomicRMWInst::UMax: |
19151 | 0 | return Intrinsic::riscv_masked_atomicrmw_umax_i64; |
19152 | 0 | case AtomicRMWInst::UMin: |
19153 | 0 | return Intrinsic::riscv_masked_atomicrmw_umin_i64; |
19154 | 0 | } |
19155 | 0 | } |
19156 | | |
19157 | 0 | llvm_unreachable("Unexpected XLen\n"); |
19158 | 0 | } |
19159 | | |
19160 | | Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( |
19161 | | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
19162 | 0 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
19163 | | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
19164 | | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
19165 | | // mask, as this produces better code than the LR/SC loop emitted by |
19166 | | // int_riscv_masked_atomicrmw_xchg. |
19167 | 0 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
19168 | 0 | isa<ConstantInt>(AI->getValOperand())) { |
19169 | 0 | ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand()); |
19170 | 0 | if (CVal->isZero()) |
19171 | 0 | return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr, |
19172 | 0 | Builder.CreateNot(Mask, "Inv_Mask"), |
19173 | 0 | AI->getAlign(), Ord); |
19174 | 0 | if (CVal->isMinusOne()) |
19175 | 0 | return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask, |
19176 | 0 | AI->getAlign(), Ord); |
19177 | 0 | } |
19178 | | |
19179 | 0 | unsigned XLen = Subtarget.getXLen(); |
19180 | 0 | Value *Ordering = |
19181 | 0 | Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); |
19182 | 0 | Type *Tys[] = {AlignedAddr->getType()}; |
19183 | 0 | Function *LrwOpScwLoop = Intrinsic::getDeclaration( |
19184 | 0 | AI->getModule(), |
19185 | 0 | getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); |
19186 | |
|
19187 | 0 | if (XLen == 64) { |
19188 | 0 | Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); |
19189 | 0 | Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); |
19190 | 0 | ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); |
19191 | 0 | } |
19192 | |
|
19193 | 0 | Value *Result; |
19194 | | |
19195 | | // Must pass the shift amount needed to sign extend the loaded value prior |
19196 | | // to performing a signed comparison for min/max. ShiftAmt is the number of |
19197 | | // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which |
19198 | | // is the number of bits to left+right shift the value in order to |
19199 | | // sign-extend. |
19200 | 0 | if (AI->getOperation() == AtomicRMWInst::Min || |
19201 | 0 | AI->getOperation() == AtomicRMWInst::Max) { |
19202 | 0 | const DataLayout &DL = AI->getModule()->getDataLayout(); |
19203 | 0 | unsigned ValWidth = |
19204 | 0 | DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); |
19205 | 0 | Value *SextShamt = |
19206 | 0 | Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); |
19207 | 0 | Result = Builder.CreateCall(LrwOpScwLoop, |
19208 | 0 | {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
19209 | 0 | } else { |
19210 | 0 | Result = |
19211 | 0 | Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); |
19212 | 0 | } |
19213 | |
|
19214 | 0 | if (XLen == 64) |
19215 | 0 | Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); |
19216 | 0 | return Result; |
19217 | 0 | } |
19218 | | |
19219 | | TargetLowering::AtomicExpansionKind |
19220 | | RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( |
19221 | 0 | AtomicCmpXchgInst *CI) const { |
19222 | | // Don't expand forced atomics, we want to have __sync libcalls instead. |
19223 | 0 | if (Subtarget.hasForcedAtomics()) |
19224 | 0 | return AtomicExpansionKind::None; |
19225 | | |
19226 | 0 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
19227 | 0 | if (Size == 8 || Size == 16) |
19228 | 0 | return AtomicExpansionKind::MaskedIntrinsic; |
19229 | 0 | return AtomicExpansionKind::None; |
19230 | 0 | } |
19231 | | |
19232 | | Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
19233 | | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
19234 | 0 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
19235 | 0 | unsigned XLen = Subtarget.getXLen(); |
19236 | 0 | Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); |
19237 | 0 | Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; |
19238 | 0 | if (XLen == 64) { |
19239 | 0 | CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); |
19240 | 0 | NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); |
19241 | 0 | Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); |
19242 | 0 | CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; |
19243 | 0 | } |
19244 | 0 | Type *Tys[] = {AlignedAddr->getType()}; |
19245 | 0 | Function *MaskedCmpXchg = |
19246 | 0 | Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); |
19247 | 0 | Value *Result = Builder.CreateCall( |
19248 | 0 | MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); |
19249 | 0 | if (XLen == 64) |
19250 | 0 | Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); |
19251 | 0 | return Result; |
19252 | 0 | } |
19253 | | |
19254 | | bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend, |
19255 | 0 | EVT DataVT) const { |
19256 | | // We have indexed loads for all legal index types. Indices are always |
19257 | | // zero extended |
19258 | 0 | return Extend.getOpcode() == ISD::ZERO_EXTEND && |
19259 | 0 | isTypeLegal(Extend.getValueType()) && |
19260 | 0 | isTypeLegal(Extend.getOperand(0).getValueType()); |
19261 | 0 | } |
19262 | | |
19263 | | bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, |
19264 | 0 | EVT VT) const { |
19265 | 0 | if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) |
19266 | 0 | return false; |
19267 | | |
19268 | 0 | switch (FPVT.getSimpleVT().SimpleTy) { |
19269 | 0 | case MVT::f16: |
19270 | 0 | return Subtarget.hasStdExtZfhmin(); |
19271 | 0 | case MVT::f32: |
19272 | 0 | return Subtarget.hasStdExtF(); |
19273 | 0 | case MVT::f64: |
19274 | 0 | return Subtarget.hasStdExtD(); |
19275 | 0 | default: |
19276 | 0 | return false; |
19277 | 0 | } |
19278 | 0 | } |
19279 | | |
19280 | 0 | unsigned RISCVTargetLowering::getJumpTableEncoding() const { |
19281 | | // If we are using the small code model, we can reduce size of jump table |
19282 | | // entry to 4 bytes. |
19283 | 0 | if (Subtarget.is64Bit() && !isPositionIndependent() && |
19284 | 0 | getTargetMachine().getCodeModel() == CodeModel::Small) { |
19285 | 0 | return MachineJumpTableInfo::EK_Custom32; |
19286 | 0 | } |
19287 | 0 | return TargetLowering::getJumpTableEncoding(); |
19288 | 0 | } |
19289 | | |
19290 | | const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( |
19291 | | const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, |
19292 | 0 | unsigned uid, MCContext &Ctx) const { |
19293 | 0 | assert(Subtarget.is64Bit() && !isPositionIndependent() && |
19294 | 0 | getTargetMachine().getCodeModel() == CodeModel::Small); |
19295 | 0 | return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); |
19296 | 0 | } |
19297 | | |
19298 | 0 | bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { |
19299 | | // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power |
19300 | | // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be |
19301 | | // a power of two as well. |
19302 | | // FIXME: This doesn't work for zve32, but that's already broken |
19303 | | // elsewhere for the same reason. |
19304 | 0 | assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); |
19305 | 0 | static_assert(RISCV::RVVBitsPerBlock == 64, |
19306 | 0 | "RVVBitsPerBlock changed, audit needed"); |
19307 | 0 | return true; |
19308 | 0 | } |
19309 | | |
19310 | | bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, |
19311 | | SDValue &Offset, |
19312 | | ISD::MemIndexedMode &AM, |
19313 | 0 | SelectionDAG &DAG) const { |
19314 | | // Target does not support indexed loads. |
19315 | 0 | if (!Subtarget.hasVendorXTHeadMemIdx()) |
19316 | 0 | return false; |
19317 | | |
19318 | 0 | if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) |
19319 | 0 | return false; |
19320 | | |
19321 | 0 | Base = Op->getOperand(0); |
19322 | 0 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { |
19323 | 0 | int64_t RHSC = RHS->getSExtValue(); |
19324 | 0 | if (Op->getOpcode() == ISD::SUB) |
19325 | 0 | RHSC = -(uint64_t)RHSC; |
19326 | | |
19327 | | // The constants that can be encoded in the THeadMemIdx instructions |
19328 | | // are of the form (sign_extend(imm5) << imm2). |
19329 | 0 | bool isLegalIndexedOffset = false; |
19330 | 0 | for (unsigned i = 0; i < 4; i++) |
19331 | 0 | if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) { |
19332 | 0 | isLegalIndexedOffset = true; |
19333 | 0 | break; |
19334 | 0 | } |
19335 | |
|
19336 | 0 | if (!isLegalIndexedOffset) |
19337 | 0 | return false; |
19338 | | |
19339 | 0 | Offset = Op->getOperand(1); |
19340 | 0 | return true; |
19341 | 0 | } |
19342 | | |
19343 | 0 | return false; |
19344 | 0 | } |
19345 | | |
19346 | | bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
19347 | | SDValue &Offset, |
19348 | | ISD::MemIndexedMode &AM, |
19349 | 0 | SelectionDAG &DAG) const { |
19350 | 0 | EVT VT; |
19351 | 0 | SDValue Ptr; |
19352 | 0 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
19353 | 0 | VT = LD->getMemoryVT(); |
19354 | 0 | Ptr = LD->getBasePtr(); |
19355 | 0 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { |
19356 | 0 | VT = ST->getMemoryVT(); |
19357 | 0 | Ptr = ST->getBasePtr(); |
19358 | 0 | } else |
19359 | 0 | return false; |
19360 | | |
19361 | 0 | if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG)) |
19362 | 0 | return false; |
19363 | | |
19364 | 0 | AM = ISD::PRE_INC; |
19365 | 0 | return true; |
19366 | 0 | } |
19367 | | |
19368 | | bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, |
19369 | | SDValue &Base, |
19370 | | SDValue &Offset, |
19371 | | ISD::MemIndexedMode &AM, |
19372 | 0 | SelectionDAG &DAG) const { |
19373 | 0 | EVT VT; |
19374 | 0 | SDValue Ptr; |
19375 | 0 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
19376 | 0 | VT = LD->getMemoryVT(); |
19377 | 0 | Ptr = LD->getBasePtr(); |
19378 | 0 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { |
19379 | 0 | VT = ST->getMemoryVT(); |
19380 | 0 | Ptr = ST->getBasePtr(); |
19381 | 0 | } else |
19382 | 0 | return false; |
19383 | | |
19384 | 0 | if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) |
19385 | 0 | return false; |
19386 | | // Post-indexing updates the base, so it's not a valid transform |
19387 | | // if that's not the same as the load's pointer. |
19388 | 0 | if (Ptr != Base) |
19389 | 0 | return false; |
19390 | | |
19391 | 0 | AM = ISD::POST_INC; |
19392 | 0 | return true; |
19393 | 0 | } |
19394 | | |
19395 | | bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
19396 | 7.17k | EVT VT) const { |
19397 | 7.17k | EVT SVT = VT.getScalarType(); |
19398 | | |
19399 | 7.17k | if (!SVT.isSimple()) |
19400 | 0 | return false; |
19401 | | |
19402 | 7.17k | switch (SVT.getSimpleVT().SimpleTy) { |
19403 | 0 | case MVT::f16: |
19404 | 0 | return VT.isVector() ? Subtarget.hasVInstructionsF16() |
19405 | 0 | : Subtarget.hasStdExtZfhOrZhinx(); |
19406 | 4.34k | case MVT::f32: |
19407 | 4.34k | return Subtarget.hasStdExtFOrZfinx(); |
19408 | 2.82k | case MVT::f64: |
19409 | 2.82k | return Subtarget.hasStdExtDOrZdinx(); |
19410 | 0 | default: |
19411 | 0 | break; |
19412 | 7.17k | } |
19413 | | |
19414 | 0 | return false; |
19415 | 7.17k | } |
19416 | | |
19417 | 0 | ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { |
19418 | | // Zacas will use amocas.w which does not require extension. |
19419 | 0 | return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; |
19420 | 0 | } |
19421 | | |
19422 | | Register RISCVTargetLowering::getExceptionPointerRegister( |
19423 | 0 | const Constant *PersonalityFn) const { |
19424 | 0 | return RISCV::X10; |
19425 | 0 | } |
19426 | | |
19427 | | Register RISCVTargetLowering::getExceptionSelectorRegister( |
19428 | 0 | const Constant *PersonalityFn) const { |
19429 | 0 | return RISCV::X11; |
19430 | 0 | } |
19431 | | |
19432 | 91.8k | bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
19433 | | // Return false to suppress the unnecessary extensions if the LibCall |
19434 | | // arguments or return value is a float narrower than XLEN on a soft FP ABI. |
19435 | 91.8k | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
19436 | 91.8k | Type.getSizeInBits() < Subtarget.getXLen())) |
19437 | 36.0k | return false; |
19438 | | |
19439 | 55.7k | return true; |
19440 | 91.8k | } |
19441 | | |
19442 | 128k | bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
19443 | 128k | if (Subtarget.is64Bit() && Type == MVT::i32) |
19444 | 57.3k | return true; |
19445 | | |
19446 | 71.0k | return IsSigned; |
19447 | 128k | } |
19448 | | |
19449 | | bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
19450 | 707 | SDValue C) const { |
19451 | | // Check integral scalar types. |
19452 | 707 | const bool HasExtMOrZmmul = |
19453 | 707 | Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); |
19454 | 707 | if (!VT.isScalarInteger()) |
19455 | 0 | return false; |
19456 | | |
19457 | | // Omit the optimization if the sub target has the M extension and the data |
19458 | | // size exceeds XLen. |
19459 | 707 | if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) |
19460 | 0 | return false; |
19461 | | |
19462 | 707 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { |
19463 | | // Break the MUL to a SLLI and an ADD/SUB. |
19464 | 707 | const APInt &Imm = ConstNode->getAPIntValue(); |
19465 | 707 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
19466 | 707 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
19467 | 394 | return true; |
19468 | | |
19469 | | // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. |
19470 | 313 | if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && |
19471 | 313 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
19472 | 0 | (Imm - 8).isPowerOf2())) |
19473 | 0 | return true; |
19474 | | |
19475 | | // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs |
19476 | | // a pair of LUI/ADDI. |
19477 | 313 | if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 && |
19478 | 313 | ConstNode->hasOneUse()) { |
19479 | 4 | APInt ImmS = Imm.ashr(Imm.countr_zero()); |
19480 | 4 | if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || |
19481 | 4 | (1 - ImmS).isPowerOf2()) |
19482 | 1 | return true; |
19483 | 4 | } |
19484 | 313 | } |
19485 | | |
19486 | 312 | return false; |
19487 | 707 | } |
19488 | | |
19489 | | bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, |
19490 | 0 | SDValue ConstNode) const { |
19491 | | // Let the DAGCombiner decide for vectors. |
19492 | 0 | EVT VT = AddNode.getValueType(); |
19493 | 0 | if (VT.isVector()) |
19494 | 0 | return true; |
19495 | | |
19496 | | // Let the DAGCombiner decide for larger types. |
19497 | 0 | if (VT.getScalarSizeInBits() > Subtarget.getXLen()) |
19498 | 0 | return true; |
19499 | | |
19500 | | // It is worse if c1 is simm12 while c1*c2 is not. |
19501 | 0 | ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); |
19502 | 0 | ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); |
19503 | 0 | const APInt &C1 = C1Node->getAPIntValue(); |
19504 | 0 | const APInt &C2 = C2Node->getAPIntValue(); |
19505 | 0 | if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) |
19506 | 0 | return false; |
19507 | | |
19508 | | // Default to true and let the DAGCombiner decide. |
19509 | 0 | return true; |
19510 | 0 | } |
19511 | | |
19512 | | bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( |
19513 | | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
19514 | 15.2k | unsigned *Fast) const { |
19515 | 15.2k | if (!VT.isVector()) { |
19516 | 15.2k | if (Fast) |
19517 | 6.34k | *Fast = Subtarget.hasFastUnalignedAccess(); |
19518 | 15.2k | return Subtarget.hasFastUnalignedAccess(); |
19519 | 15.2k | } |
19520 | | |
19521 | | // All vector implementations must support element alignment |
19522 | 0 | EVT ElemVT = VT.getVectorElementType(); |
19523 | 0 | if (Alignment >= ElemVT.getStoreSize()) { |
19524 | 0 | if (Fast) |
19525 | 0 | *Fast = 1; |
19526 | 0 | return true; |
19527 | 0 | } |
19528 | | |
19529 | | // Note: We lower an unmasked unaligned vector access to an equally sized |
19530 | | // e8 element type access. Given this, we effectively support all unmasked |
19531 | | // misaligned accesses. TODO: Work through the codegen implications of |
19532 | | // allowing such accesses to be formed, and considered fast. |
19533 | 0 | if (Fast) |
19534 | 0 | *Fast = Subtarget.hasFastUnalignedAccess(); |
19535 | 0 | return Subtarget.hasFastUnalignedAccess(); |
19536 | 0 | } |
19537 | | |
19538 | | |
19539 | | EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op, |
19540 | 0 | const AttributeList &FuncAttributes) const { |
19541 | 0 | if (!Subtarget.hasVInstructions()) |
19542 | 0 | return MVT::Other; |
19543 | | |
19544 | 0 | if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) |
19545 | 0 | return MVT::Other; |
19546 | | |
19547 | | // We use LMUL1 memory operations here for a non-obvious reason. Our caller |
19548 | | // has an expansion threshold, and we want the number of hardware memory |
19549 | | // operations to correspond roughly to that threshold. LMUL>1 operations |
19550 | | // are typically expanded linearly internally, and thus correspond to more |
19551 | | // than one actual memory operation. Note that store merging and load |
19552 | | // combining will typically form larger LMUL operations from the LMUL1 |
19553 | | // operations emitted here, and that's okay because combining isn't |
19554 | | // introducing new memory operations; it's just merging existing ones. |
19555 | 0 | const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8; |
19556 | 0 | if (Op.size() < MinVLenInBytes) |
19557 | | // TODO: Figure out short memops. For the moment, do the default thing |
19558 | | // which ends up using scalar sequences. |
19559 | 0 | return MVT::Other; |
19560 | | |
19561 | | // Prefer i8 for non-zero memset as it allows us to avoid materializing |
19562 | | // a large scalar constant and instead use vmv.v.x/i to do the |
19563 | | // broadcast. For everything else, prefer ELenVT to minimize VL and thus |
19564 | | // maximize the chance we can encode the size in the vsetvli. |
19565 | 0 | MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen()); |
19566 | 0 | MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT; |
19567 | | |
19568 | | // Do we have sufficient alignment for our preferred VT? If not, revert |
19569 | | // to largest size allowed by our alignment criteria. |
19570 | 0 | if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) { |
19571 | 0 | Align RequiredAlign(PreferredVT.getStoreSize()); |
19572 | 0 | if (Op.isFixedDstAlign()) |
19573 | 0 | RequiredAlign = std::min(RequiredAlign, Op.getDstAlign()); |
19574 | 0 | if (Op.isMemcpy()) |
19575 | 0 | RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign()); |
19576 | 0 | PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8); |
19577 | 0 | } |
19578 | 0 | return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize()); |
19579 | 0 | } |
19580 | | |
19581 | | bool RISCVTargetLowering::splitValueIntoRegisterParts( |
19582 | | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
19583 | 117k | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
19584 | 117k | bool IsABIRegCopy = CC.has_value(); |
19585 | 117k | EVT ValueVT = Val.getValueType(); |
19586 | 117k | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
19587 | 117k | PartVT == MVT::f32) { |
19588 | | // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float |
19589 | | // nan, and cast to f32. |
19590 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); |
19591 | 0 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); |
19592 | 0 | Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, |
19593 | 0 | DAG.getConstant(0xFFFF0000, DL, MVT::i32)); |
19594 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); |
19595 | 0 | Parts[0] = Val; |
19596 | 0 | return true; |
19597 | 0 | } |
19598 | | |
19599 | 117k | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
19600 | 0 | LLVMContext &Context = *DAG.getContext(); |
19601 | 0 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
19602 | 0 | EVT PartEltVT = PartVT.getVectorElementType(); |
19603 | 0 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
19604 | 0 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
19605 | 0 | if (PartVTBitSize % ValueVTBitSize == 0) { |
19606 | 0 | assert(PartVTBitSize >= ValueVTBitSize); |
19607 | | // If the element types are different, bitcast to the same element type of |
19608 | | // PartVT first. |
19609 | | // Give an example here, we want copy a <vscale x 1 x i8> value to |
19610 | | // <vscale x 4 x i16>. |
19611 | | // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert |
19612 | | // subvector, then we can bitcast to <vscale x 4 x i16>. |
19613 | 0 | if (ValueEltVT != PartEltVT) { |
19614 | 0 | if (PartVTBitSize > ValueVTBitSize) { |
19615 | 0 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
19616 | 0 | assert(Count != 0 && "The number of element should not be zero."); |
19617 | 0 | EVT SameEltTypeVT = |
19618 | 0 | EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); |
19619 | 0 | Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, |
19620 | 0 | DAG.getUNDEF(SameEltTypeVT), Val, |
19621 | 0 | DAG.getVectorIdxConstant(0, DL)); |
19622 | 0 | } |
19623 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
19624 | 0 | } else { |
19625 | 0 | Val = |
19626 | 0 | DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), |
19627 | 0 | Val, DAG.getVectorIdxConstant(0, DL)); |
19628 | 0 | } |
19629 | 0 | Parts[0] = Val; |
19630 | 0 | return true; |
19631 | 0 | } |
19632 | 0 | } |
19633 | 117k | return false; |
19634 | 117k | } |
19635 | | |
19636 | | SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( |
19637 | | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
19638 | 103k | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
19639 | 103k | bool IsABIRegCopy = CC.has_value(); |
19640 | 103k | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
19641 | 103k | PartVT == MVT::f32) { |
19642 | 0 | SDValue Val = Parts[0]; |
19643 | | |
19644 | | // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. |
19645 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); |
19646 | 0 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); |
19647 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
19648 | 0 | return Val; |
19649 | 0 | } |
19650 | | |
19651 | 103k | if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { |
19652 | 0 | LLVMContext &Context = *DAG.getContext(); |
19653 | 0 | SDValue Val = Parts[0]; |
19654 | 0 | EVT ValueEltVT = ValueVT.getVectorElementType(); |
19655 | 0 | EVT PartEltVT = PartVT.getVectorElementType(); |
19656 | 0 | unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); |
19657 | 0 | unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); |
19658 | 0 | if (PartVTBitSize % ValueVTBitSize == 0) { |
19659 | 0 | assert(PartVTBitSize >= ValueVTBitSize); |
19660 | 0 | EVT SameEltTypeVT = ValueVT; |
19661 | | // If the element types are different, convert it to the same element type |
19662 | | // of PartVT. |
19663 | | // Give an example here, we want copy a <vscale x 1 x i8> value from |
19664 | | // <vscale x 4 x i16>. |
19665 | | // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, |
19666 | | // then we can extract <vscale x 1 x i8>. |
19667 | 0 | if (ValueEltVT != PartEltVT) { |
19668 | 0 | unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); |
19669 | 0 | assert(Count != 0 && "The number of element should not be zero."); |
19670 | 0 | SameEltTypeVT = |
19671 | 0 | EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); |
19672 | 0 | Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); |
19673 | 0 | } |
19674 | 0 | Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, |
19675 | 0 | DAG.getVectorIdxConstant(0, DL)); |
19676 | 0 | return Val; |
19677 | 0 | } |
19678 | 0 | } |
19679 | 103k | return SDValue(); |
19680 | 103k | } |
19681 | | |
19682 | 12.0k | bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
19683 | | // When aggressively optimizing for code size, we prefer to use a div |
19684 | | // instruction, as it is usually smaller than the alternative sequence. |
19685 | | // TODO: Add vector division? |
19686 | 12.0k | bool OptSize = Attr.hasFnAttr(Attribute::MinSize); |
19687 | 12.0k | return OptSize && !VT.isVector(); |
19688 | 12.0k | } |
19689 | | |
19690 | 0 | bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { |
19691 | | // Scalarize zero_ext and sign_ext might stop match to widening instruction in |
19692 | | // some situation. |
19693 | 0 | unsigned Opc = N->getOpcode(); |
19694 | 0 | if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) |
19695 | 0 | return false; |
19696 | 0 | return true; |
19697 | 0 | } |
19698 | | |
19699 | 0 | static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { |
19700 | 0 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
19701 | 0 | Function *ThreadPointerFunc = |
19702 | 0 | Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); |
19703 | 0 | return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), |
19704 | 0 | IRB.CreateCall(ThreadPointerFunc), Offset); |
19705 | 0 | } |
19706 | | |
19707 | 0 | Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
19708 | | // Fuchsia provides a fixed TLS slot for the stack cookie. |
19709 | | // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
19710 | 0 | if (Subtarget.isTargetFuchsia()) |
19711 | 0 | return useTpOffset(IRB, -0x10); |
19712 | | |
19713 | 0 | return TargetLowering::getIRStackGuard(IRB); |
19714 | 0 | } |
19715 | | |
19716 | | bool RISCVTargetLowering::isLegalInterleavedAccessType( |
19717 | | VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, |
19718 | 0 | const DataLayout &DL) const { |
19719 | 0 | EVT VT = getValueType(DL, VTy); |
19720 | | // Don't lower vlseg/vsseg for vector types that can't be split. |
19721 | 0 | if (!isTypeLegal(VT)) |
19722 | 0 | return false; |
19723 | | |
19724 | 0 | if (!isLegalElementTypeForRVV(VT.getScalarType()) || |
19725 | 0 | !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, |
19726 | 0 | Alignment)) |
19727 | 0 | return false; |
19728 | | |
19729 | 0 | MVT ContainerVT = VT.getSimpleVT(); |
19730 | |
|
19731 | 0 | if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { |
19732 | 0 | if (!Subtarget.useRVVForFixedLengthVectors()) |
19733 | 0 | return false; |
19734 | | // Sometimes the interleaved access pass picks up splats as interleaves of |
19735 | | // one element. Don't lower these. |
19736 | 0 | if (FVTy->getNumElements() < 2) |
19737 | 0 | return false; |
19738 | | |
19739 | 0 | ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT()); |
19740 | 0 | } |
19741 | | |
19742 | | // Need to make sure that EMUL * NFIELDS ≤ 8 |
19743 | 0 | auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT)); |
19744 | 0 | if (Fractional) |
19745 | 0 | return true; |
19746 | 0 | return Factor * LMUL <= 8; |
19747 | 0 | } |
19748 | | |
19749 | | bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, |
19750 | 0 | Align Alignment) const { |
19751 | 0 | if (!Subtarget.hasVInstructions()) |
19752 | 0 | return false; |
19753 | | |
19754 | | // Only support fixed vectors if we know the minimum vector size. |
19755 | 0 | if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors()) |
19756 | 0 | return false; |
19757 | | |
19758 | 0 | EVT ScalarType = DataType.getScalarType(); |
19759 | 0 | if (!isLegalElementTypeForRVV(ScalarType)) |
19760 | 0 | return false; |
19761 | | |
19762 | 0 | if (!Subtarget.hasFastUnalignedAccess() && |
19763 | 0 | Alignment < ScalarType.getStoreSize()) |
19764 | 0 | return false; |
19765 | | |
19766 | 0 | return true; |
19767 | 0 | } |
19768 | | |
19769 | | static const Intrinsic::ID FixedVlsegIntrIds[] = { |
19770 | | Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load, |
19771 | | Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load, |
19772 | | Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load, |
19773 | | Intrinsic::riscv_seg8_load}; |
19774 | | |
19775 | | /// Lower an interleaved load into a vlsegN intrinsic. |
19776 | | /// |
19777 | | /// E.g. Lower an interleaved load (Factor = 2): |
19778 | | /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr |
19779 | | /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements |
19780 | | /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements |
19781 | | /// |
19782 | | /// Into: |
19783 | | /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64( |
19784 | | /// %ptr, i64 4) |
19785 | | /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 |
19786 | | /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 |
19787 | | bool RISCVTargetLowering::lowerInterleavedLoad( |
19788 | | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
19789 | 0 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
19790 | 0 | IRBuilder<> Builder(LI); |
19791 | |
|
19792 | 0 | auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); |
19793 | 0 | if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), |
19794 | 0 | LI->getPointerAddressSpace(), |
19795 | 0 | LI->getModule()->getDataLayout())) |
19796 | 0 | return false; |
19797 | | |
19798 | 0 | auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); |
19799 | |
|
19800 | 0 | Function *VlsegNFunc = |
19801 | 0 | Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2], |
19802 | 0 | {VTy, LI->getPointerOperandType(), XLenTy}); |
19803 | |
|
19804 | 0 | Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); |
19805 | |
|
19806 | 0 | CallInst *VlsegN = |
19807 | 0 | Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL}); |
19808 | |
|
19809 | 0 | for (unsigned i = 0; i < Shuffles.size(); i++) { |
19810 | 0 | Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); |
19811 | 0 | Shuffles[i]->replaceAllUsesWith(SubVec); |
19812 | 0 | } |
19813 | |
|
19814 | 0 | return true; |
19815 | 0 | } |
19816 | | |
19817 | | static const Intrinsic::ID FixedVssegIntrIds[] = { |
19818 | | Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store, |
19819 | | Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store, |
19820 | | Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store, |
19821 | | Intrinsic::riscv_seg8_store}; |
19822 | | |
19823 | | /// Lower an interleaved store into a vssegN intrinsic. |
19824 | | /// |
19825 | | /// E.g. Lower an interleaved store (Factor = 3): |
19826 | | /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, |
19827 | | /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> |
19828 | | /// store <12 x i32> %i.vec, <12 x i32>* %ptr |
19829 | | /// |
19830 | | /// Into: |
19831 | | /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> |
19832 | | /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> |
19833 | | /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> |
19834 | | /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2, |
19835 | | /// %ptr, i32 4) |
19836 | | /// |
19837 | | /// Note that the new shufflevectors will be removed and we'll only generate one |
19838 | | /// vsseg3 instruction in CodeGen. |
19839 | | bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, |
19840 | | ShuffleVectorInst *SVI, |
19841 | 0 | unsigned Factor) const { |
19842 | 0 | IRBuilder<> Builder(SI); |
19843 | 0 | auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType()); |
19844 | | // Given SVI : <n*factor x ty>, then VTy : <n x ty> |
19845 | 0 | auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), |
19846 | 0 | ShuffleVTy->getNumElements() / Factor); |
19847 | 0 | if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), |
19848 | 0 | SI->getPointerAddressSpace(), |
19849 | 0 | SI->getModule()->getDataLayout())) |
19850 | 0 | return false; |
19851 | | |
19852 | 0 | auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); |
19853 | |
|
19854 | 0 | Function *VssegNFunc = |
19855 | 0 | Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2], |
19856 | 0 | {VTy, SI->getPointerOperandType(), XLenTy}); |
19857 | |
|
19858 | 0 | auto Mask = SVI->getShuffleMask(); |
19859 | 0 | SmallVector<Value *, 10> Ops; |
19860 | |
|
19861 | 0 | for (unsigned i = 0; i < Factor; i++) { |
19862 | 0 | Value *Shuffle = Builder.CreateShuffleVector( |
19863 | 0 | SVI->getOperand(0), SVI->getOperand(1), |
19864 | 0 | createSequentialMask(Mask[i], VTy->getNumElements(), 0)); |
19865 | 0 | Ops.push_back(Shuffle); |
19866 | 0 | } |
19867 | | // This VL should be OK (should be executable in one vsseg instruction, |
19868 | | // potentially under larger LMULs) because we checked that the fixed vector |
19869 | | // type fits in isLegalInterleavedAccessType |
19870 | 0 | Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); |
19871 | 0 | Ops.append({SI->getPointerOperand(), VL}); |
19872 | |
|
19873 | 0 | Builder.CreateCall(VssegNFunc, Ops); |
19874 | |
|
19875 | 0 | return true; |
19876 | 0 | } |
19877 | | |
19878 | | bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
19879 | 0 | LoadInst *LI) const { |
19880 | 0 | assert(LI->isSimple()); |
19881 | 0 | IRBuilder<> Builder(LI); |
19882 | | |
19883 | | // Only deinterleave2 supported at present. |
19884 | 0 | if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2) |
19885 | 0 | return false; |
19886 | | |
19887 | 0 | unsigned Factor = 2; |
19888 | |
|
19889 | 0 | VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType()); |
19890 | 0 | VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0)); |
19891 | |
|
19892 | 0 | if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), |
19893 | 0 | LI->getPointerAddressSpace(), |
19894 | 0 | LI->getModule()->getDataLayout())) |
19895 | 0 | return false; |
19896 | | |
19897 | 0 | Function *VlsegNFunc; |
19898 | 0 | Value *VL; |
19899 | 0 | Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); |
19900 | 0 | SmallVector<Value *, 10> Ops; |
19901 | |
|
19902 | 0 | if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { |
19903 | 0 | VlsegNFunc = Intrinsic::getDeclaration( |
19904 | 0 | LI->getModule(), FixedVlsegIntrIds[Factor - 2], |
19905 | 0 | {ResVTy, LI->getPointerOperandType(), XLenTy}); |
19906 | 0 | VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); |
19907 | 0 | } else { |
19908 | 0 | static const Intrinsic::ID IntrIds[] = { |
19909 | 0 | Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, |
19910 | 0 | Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, |
19911 | 0 | Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, |
19912 | 0 | Intrinsic::riscv_vlseg8}; |
19913 | |
|
19914 | 0 | VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2], |
19915 | 0 | {ResVTy, XLenTy}); |
19916 | 0 | VL = Constant::getAllOnesValue(XLenTy); |
19917 | 0 | Ops.append(Factor, PoisonValue::get(ResVTy)); |
19918 | 0 | } |
19919 | |
|
19920 | 0 | Ops.append({LI->getPointerOperand(), VL}); |
19921 | |
|
19922 | 0 | Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops); |
19923 | 0 | DI->replaceAllUsesWith(Vlseg); |
19924 | |
|
19925 | 0 | return true; |
19926 | 0 | } |
19927 | | |
19928 | | bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
19929 | 0 | StoreInst *SI) const { |
19930 | 0 | assert(SI->isSimple()); |
19931 | 0 | IRBuilder<> Builder(SI); |
19932 | | |
19933 | | // Only interleave2 supported at present. |
19934 | 0 | if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2) |
19935 | 0 | return false; |
19936 | | |
19937 | 0 | unsigned Factor = 2; |
19938 | |
|
19939 | 0 | VectorType *VTy = cast<VectorType>(II->getType()); |
19940 | 0 | VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType()); |
19941 | |
|
19942 | 0 | if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), |
19943 | 0 | SI->getPointerAddressSpace(), |
19944 | 0 | SI->getModule()->getDataLayout())) |
19945 | 0 | return false; |
19946 | | |
19947 | 0 | Function *VssegNFunc; |
19948 | 0 | Value *VL; |
19949 | 0 | Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); |
19950 | |
|
19951 | 0 | if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { |
19952 | 0 | VssegNFunc = Intrinsic::getDeclaration( |
19953 | 0 | SI->getModule(), FixedVssegIntrIds[Factor - 2], |
19954 | 0 | {InVTy, SI->getPointerOperandType(), XLenTy}); |
19955 | 0 | VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); |
19956 | 0 | } else { |
19957 | 0 | static const Intrinsic::ID IntrIds[] = { |
19958 | 0 | Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, |
19959 | 0 | Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, |
19960 | 0 | Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, |
19961 | 0 | Intrinsic::riscv_vsseg8}; |
19962 | |
|
19963 | 0 | VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2], |
19964 | 0 | {InVTy, XLenTy}); |
19965 | 0 | VL = Constant::getAllOnesValue(XLenTy); |
19966 | 0 | } |
19967 | |
|
19968 | 0 | Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1), |
19969 | 0 | SI->getPointerOperand(), VL}); |
19970 | |
|
19971 | 0 | return true; |
19972 | 0 | } |
19973 | | |
19974 | | MachineInstr * |
19975 | | RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, |
19976 | | MachineBasicBlock::instr_iterator &MBBI, |
19977 | 0 | const TargetInstrInfo *TII) const { |
19978 | 0 | assert(MBBI->isCall() && MBBI->getCFIType() && |
19979 | 0 | "Invalid call instruction for a KCFI check"); |
19980 | 0 | assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect}, |
19981 | 0 | MBBI->getOpcode())); |
19982 | | |
19983 | 0 | MachineOperand &Target = MBBI->getOperand(0); |
19984 | 0 | Target.setIsRenamable(false); |
19985 | |
|
19986 | 0 | return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK)) |
19987 | 0 | .addReg(Target.getReg()) |
19988 | 0 | .addImm(MBBI->getCFIType()) |
19989 | 0 | .getInstr(); |
19990 | 0 | } |
19991 | | |
19992 | | #define GET_REGISTER_MATCHER |
19993 | | #include "RISCVGenAsmMatcher.inc" |
19994 | | |
19995 | | Register |
19996 | | RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
19997 | 0 | const MachineFunction &MF) const { |
19998 | 0 | Register Reg = MatchRegisterAltName(RegName); |
19999 | 0 | if (Reg == RISCV::NoRegister) |
20000 | 0 | Reg = MatchRegisterName(RegName); |
20001 | 0 | if (Reg == RISCV::NoRegister) |
20002 | 0 | report_fatal_error( |
20003 | 0 | Twine("Invalid register name \"" + StringRef(RegName) + "\".")); |
20004 | 0 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
20005 | 0 | if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) |
20006 | 0 | report_fatal_error(Twine("Trying to obtain non-reserved register \"" + |
20007 | 0 | StringRef(RegName) + "\".")); |
20008 | 0 | return Reg; |
20009 | 0 | } |
20010 | | |
20011 | | MachineMemOperand::Flags |
20012 | 233k | RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const { |
20013 | 233k | const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal); |
20014 | | |
20015 | 233k | if (NontemporalInfo == nullptr) |
20016 | 233k | return MachineMemOperand::MONone; |
20017 | | |
20018 | | // 1 for default value work as __RISCV_NTLH_ALL |
20019 | | // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE |
20020 | | // 3 -> __RISCV_NTLH_ALL_PRIVATE |
20021 | | // 4 -> __RISCV_NTLH_INNERMOST_SHARED |
20022 | | // 5 -> __RISCV_NTLH_ALL |
20023 | 0 | int NontemporalLevel = 5; |
20024 | 0 | const MDNode *RISCVNontemporalInfo = |
20025 | 0 | I.getMetadata("riscv-nontemporal-domain"); |
20026 | 0 | if (RISCVNontemporalInfo != nullptr) |
20027 | 0 | NontemporalLevel = |
20028 | 0 | cast<ConstantInt>( |
20029 | 0 | cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0)) |
20030 | 0 | ->getValue()) |
20031 | 0 | ->getZExtValue(); |
20032 | |
|
20033 | 0 | assert((1 <= NontemporalLevel && NontemporalLevel <= 5) && |
20034 | 0 | "RISC-V target doesn't support this non-temporal domain."); |
20035 | | |
20036 | 0 | NontemporalLevel -= 2; |
20037 | 0 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone; |
20038 | 0 | if (NontemporalLevel & 0b1) |
20039 | 0 | Flags |= MONontemporalBit0; |
20040 | 0 | if (NontemporalLevel & 0b10) |
20041 | 0 | Flags |= MONontemporalBit1; |
20042 | |
|
20043 | 0 | return Flags; |
20044 | 233k | } |
20045 | | |
20046 | | MachineMemOperand::Flags |
20047 | 314k | RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const { |
20048 | | |
20049 | 314k | MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags(); |
20050 | 314k | MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone; |
20051 | 314k | TargetFlags |= (NodeFlags & MONontemporalBit0); |
20052 | 314k | TargetFlags |= (NodeFlags & MONontemporalBit1); |
20053 | | |
20054 | 314k | return TargetFlags; |
20055 | 314k | } |
20056 | | |
20057 | | bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( |
20058 | 157k | const MemSDNode &NodeX, const MemSDNode &NodeY) const { |
20059 | 157k | return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY); |
20060 | 157k | } |
20061 | | |
20062 | 0 | bool RISCVTargetLowering::isCtpopFast(EVT VT) const { |
20063 | 0 | if (VT.isScalableVector()) |
20064 | 0 | return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); |
20065 | 0 | if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) |
20066 | 0 | return true; |
20067 | 0 | return Subtarget.hasStdExtZbb() && |
20068 | 0 | (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); |
20069 | 0 | } |
20070 | | |
20071 | | unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, |
20072 | 0 | ISD::CondCode Cond) const { |
20073 | 0 | return isCtpopFast(VT) ? 0 : 1; |
20074 | 0 | } |
20075 | | |
20076 | 0 | bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { |
20077 | | |
20078 | | // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and |
20079 | | // G_XOR. |
20080 | 0 | unsigned Op = Inst.getOpcode(); |
20081 | 0 | if (Op == Instruction::Add || Op == Instruction::Sub || |
20082 | 0 | Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) |
20083 | 0 | return false; |
20084 | | |
20085 | 0 | if (Inst.getType()->isScalableTy()) |
20086 | 0 | return true; |
20087 | | |
20088 | 0 | for (unsigned i = 0; i < Inst.getNumOperands(); ++i) |
20089 | 0 | if (Inst.getOperand(i)->getType()->isScalableTy() && |
20090 | 0 | !isa<ReturnInst>(&Inst)) |
20091 | 0 | return true; |
20092 | | |
20093 | 0 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { |
20094 | 0 | if (AI->getAllocatedType()->isScalableTy()) |
20095 | 0 | return true; |
20096 | 0 | } |
20097 | | |
20098 | 0 | return false; |
20099 | 0 | } |
20100 | | |
20101 | | SDValue |
20102 | | RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
20103 | | SelectionDAG &DAG, |
20104 | 295 | SmallVectorImpl<SDNode *> &Created) const { |
20105 | 295 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
20106 | 295 | if (isIntDivCheap(N->getValueType(0), Attr)) |
20107 | 0 | return SDValue(N, 0); // Lower SDIV as SDIV |
20108 | | |
20109 | | // Only perform this transform if short forward branch opt is supported. |
20110 | 295 | if (!Subtarget.hasShortForwardBranchOpt()) |
20111 | 295 | return SDValue(); |
20112 | 0 | EVT VT = N->getValueType(0); |
20113 | 0 | if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) |
20114 | 0 | return SDValue(); |
20115 | | |
20116 | | // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw. |
20117 | 0 | if (Divisor.sgt(2048) || Divisor.slt(-2048)) |
20118 | 0 | return SDValue(); |
20119 | 0 | return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created); |
20120 | 0 | } |
20121 | | |
20122 | | bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( |
20123 | 0 | EVT VT, const APInt &AndMask) const { |
20124 | 0 | if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) |
20125 | 0 | return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); |
20126 | 0 | return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); |
20127 | 0 | } |
20128 | | |
20129 | 0 | unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const { |
20130 | 0 | return Subtarget.getMinimumJumpTableEntries(); |
20131 | 0 | } |
20132 | | |
20133 | | namespace llvm::RISCVVIntrinsicsTable { |
20134 | | |
20135 | | #define GET_RISCVVIntrinsicsTable_IMPL |
20136 | | #include "RISCVGenSearchableTables.inc" |
20137 | | |
20138 | | } // namespace llvm::RISCVVIntrinsicsTable |