/src/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | /// \file |
8 | | /// This file implements a TargetTransformInfo analysis pass specific to the |
9 | | /// Hexagon target machine. It uses the target's detailed information to provide |
10 | | /// more precise answers to certain TTI queries, while letting the target |
11 | | /// independent and default TTI implementations handle the rest. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "HexagonTargetTransformInfo.h" |
16 | | #include "HexagonSubtarget.h" |
17 | | #include "llvm/Analysis/TargetTransformInfo.h" |
18 | | #include "llvm/CodeGen/ValueTypes.h" |
19 | | #include "llvm/IR/InstrTypes.h" |
20 | | #include "llvm/IR/Instructions.h" |
21 | | #include "llvm/IR/User.h" |
22 | | #include "llvm/Support/Casting.h" |
23 | | #include "llvm/Support/CommandLine.h" |
24 | | #include "llvm/Transforms/Utils/LoopPeel.h" |
25 | | #include "llvm/Transforms/Utils/UnrollLoop.h" |
26 | | |
27 | | using namespace llvm; |
28 | | |
29 | | #define DEBUG_TYPE "hexagontti" |
30 | | |
31 | | static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), |
32 | | cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); |
33 | | |
34 | | static cl::opt<bool> EnableV68FloatAutoHVX( |
35 | | "force-hvx-float", cl::Hidden, |
36 | | cl::desc("Enable auto-vectorization of floatint point types on v68.")); |
37 | | |
38 | | static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", |
39 | | cl::init(true), cl::Hidden, |
40 | | cl::desc("Control lookup table emission on Hexagon target")); |
41 | | |
42 | | static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true), |
43 | | cl::Hidden, cl::desc("Enable masked loads/stores for HVX")); |
44 | | |
45 | | // Constant "cost factor" to make floating point operations more expensive |
46 | | // in terms of vectorization cost. This isn't the best way, but it should |
47 | | // do. Ultimately, the cost should use cycles. |
48 | | static const unsigned FloatFactor = 4; |
49 | | |
50 | 0 | bool HexagonTTIImpl::useHVX() const { |
51 | 0 | return ST.useHVXOps() && HexagonAutoHVX; |
52 | 0 | } |
53 | | |
54 | 0 | bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const { |
55 | 0 | auto *VecTy = dyn_cast<VectorType>(Ty); |
56 | 0 | if (!VecTy) |
57 | 0 | return false; |
58 | 0 | if (!ST.isTypeForHVX(VecTy)) |
59 | 0 | return false; |
60 | 0 | if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy()) |
61 | 0 | return true; |
62 | 0 | return ST.useHVXV68Ops() && EnableV68FloatAutoHVX; |
63 | 0 | } |
64 | | |
65 | 0 | unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const { |
66 | 0 | if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) |
67 | 0 | return VTy->getNumElements(); |
68 | 0 | assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) && |
69 | 0 | "Expecting scalar type"); |
70 | 0 | return 1; |
71 | 0 | } |
72 | | |
73 | | TargetTransformInfo::PopcntSupportKind |
74 | 0 | HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { |
75 | | // Return fast hardware support as every input < 64 bits will be promoted |
76 | | // to 64 bits. |
77 | 0 | return TargetTransformInfo::PSK_FastHardware; |
78 | 0 | } |
79 | | |
80 | | // The Hexagon target can unroll loops with run-time trip counts. |
81 | | void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
82 | | TTI::UnrollingPreferences &UP, |
83 | 0 | OptimizationRemarkEmitter *ORE) { |
84 | 0 | UP.Runtime = UP.Partial = true; |
85 | 0 | } |
86 | | |
87 | | void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
88 | 0 | TTI::PeelingPreferences &PP) { |
89 | 0 | BaseT::getPeelingPreferences(L, SE, PP); |
90 | | // Only try to peel innermost loops with small runtime trip counts. |
91 | 0 | if (L && L->isInnermost() && canPeel(L) && |
92 | 0 | SE.getSmallConstantTripCount(L) == 0 && |
93 | 0 | SE.getSmallConstantMaxTripCount(L) > 0 && |
94 | 0 | SE.getSmallConstantMaxTripCount(L) <= 5) { |
95 | 0 | PP.PeelCount = 2; |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | | TTI::AddressingModeKind |
100 | | HexagonTTIImpl::getPreferredAddressingMode(const Loop *L, |
101 | 1.99k | ScalarEvolution *SE) const { |
102 | 1.99k | return TTI::AMK_PostIndexed; |
103 | 1.99k | } |
104 | | |
105 | | /// --- Vector TTI begin --- |
106 | | |
107 | 0 | unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const { |
108 | 0 | if (Vector) |
109 | 0 | return useHVX() ? 32 : 0; |
110 | 0 | return 32; |
111 | 0 | } |
112 | | |
113 | 0 | unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) { |
114 | 0 | return useHVX() ? 2 : 1; |
115 | 0 | } |
116 | | |
117 | | TypeSize |
118 | 0 | HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
119 | 0 | switch (K) { |
120 | 0 | case TargetTransformInfo::RGK_Scalar: |
121 | 0 | return TypeSize::getFixed(32); |
122 | 0 | case TargetTransformInfo::RGK_FixedWidthVector: |
123 | 0 | return TypeSize::getFixed(getMinVectorRegisterBitWidth()); |
124 | 0 | case TargetTransformInfo::RGK_ScalableVector: |
125 | 0 | return TypeSize::getScalable(0); |
126 | 0 | } |
127 | | |
128 | 0 | llvm_unreachable("Unsupported register kind"); |
129 | 0 | } |
130 | | |
131 | 0 | unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const { |
132 | 0 | return useHVX() ? ST.getVectorLength()*8 : 32; |
133 | 0 | } |
134 | | |
135 | | ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth, |
136 | 0 | bool IsScalable) const { |
137 | 0 | assert(!IsScalable && "Scalable VFs are not supported for Hexagon"); |
138 | 0 | return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth); |
139 | 0 | } |
140 | | |
141 | | InstructionCost HexagonTTIImpl::getScalarizationOverhead( |
142 | | VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, |
143 | 0 | TTI::TargetCostKind CostKind) { |
144 | 0 | return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, |
145 | 0 | CostKind); |
146 | 0 | } |
147 | | |
148 | | InstructionCost |
149 | | HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
150 | | ArrayRef<Type *> Tys, |
151 | 0 | TTI::TargetCostKind CostKind) { |
152 | 0 | return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind); |
153 | 0 | } |
154 | | |
155 | | InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, |
156 | | ArrayRef<Type *> Tys, |
157 | 0 | TTI::TargetCostKind CostKind) { |
158 | 0 | return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind); |
159 | 0 | } |
160 | | |
161 | | InstructionCost |
162 | | HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
163 | 0 | TTI::TargetCostKind CostKind) { |
164 | 0 | if (ICA.getID() == Intrinsic::bswap) { |
165 | 0 | std::pair<InstructionCost, MVT> LT = |
166 | 0 | getTypeLegalizationCost(ICA.getReturnType()); |
167 | 0 | return LT.first + 2; |
168 | 0 | } |
169 | 0 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
170 | 0 | } |
171 | | |
172 | | InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *Tp, |
173 | | ScalarEvolution *SE, |
174 | 0 | const SCEV *S) { |
175 | 0 | return 0; |
176 | 0 | } |
177 | | |
178 | | InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, |
179 | | MaybeAlign Alignment, |
180 | | unsigned AddressSpace, |
181 | | TTI::TargetCostKind CostKind, |
182 | | TTI::OperandValueInfo OpInfo, |
183 | 35 | const Instruction *I) { |
184 | 35 | assert(Opcode == Instruction::Load || Opcode == Instruction::Store); |
185 | | // TODO: Handle other cost kinds. |
186 | 35 | if (CostKind != TTI::TCK_RecipThroughput) |
187 | 35 | return 1; |
188 | | |
189 | 0 | if (Opcode == Instruction::Store) |
190 | 0 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
191 | 0 | CostKind, OpInfo, I); |
192 | | |
193 | 0 | if (Src->isVectorTy()) { |
194 | 0 | VectorType *VecTy = cast<VectorType>(Src); |
195 | 0 | unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue(); |
196 | 0 | if (isHVXVectorType(VecTy)) { |
197 | 0 | unsigned RegWidth = |
198 | 0 | getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) |
199 | 0 | .getFixedValue(); |
200 | 0 | assert(RegWidth && "Non-zero vector register width expected"); |
201 | | // Cost of HVX loads. |
202 | 0 | if (VecWidth % RegWidth == 0) |
203 | 0 | return VecWidth / RegWidth; |
204 | | // Cost of constructing HVX vector from scalar loads |
205 | 0 | const Align RegAlign(RegWidth / 8); |
206 | 0 | if (!Alignment || *Alignment > RegAlign) |
207 | 0 | Alignment = RegAlign; |
208 | 0 | assert(Alignment); |
209 | 0 | unsigned AlignWidth = 8 * Alignment->value(); |
210 | 0 | unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; |
211 | 0 | return 3 * NumLoads; |
212 | 0 | } |
213 | | |
214 | | // Non-HVX vectors. |
215 | | // Add extra cost for floating point types. |
216 | 0 | unsigned Cost = |
217 | 0 | VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1; |
218 | | |
219 | | // At this point unspecified alignment is considered as Align(1). |
220 | 0 | const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8)); |
221 | 0 | unsigned AlignWidth = 8 * BoundAlignment.value(); |
222 | 0 | unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; |
223 | 0 | if (Alignment == Align(4) || Alignment == Align(8)) |
224 | 0 | return Cost * NumLoads; |
225 | | // Loads of less than 32 bits will need extra inserts to compose a vector. |
226 | 0 | assert(BoundAlignment <= Align(8)); |
227 | 0 | unsigned LogA = Log2(BoundAlignment); |
228 | 0 | return (3 - LogA) * Cost * NumLoads; |
229 | 0 | } |
230 | | |
231 | 0 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, |
232 | 0 | OpInfo, I); |
233 | 0 | } |
234 | | |
235 | | InstructionCost |
236 | | HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
237 | | Align Alignment, unsigned AddressSpace, |
238 | 0 | TTI::TargetCostKind CostKind) { |
239 | 0 | return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
240 | 0 | CostKind); |
241 | 0 | } |
242 | | |
243 | | InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, |
244 | | ArrayRef<int> Mask, |
245 | | TTI::TargetCostKind CostKind, |
246 | | int Index, Type *SubTp, |
247 | 0 | ArrayRef<const Value *> Args) { |
248 | 0 | return 1; |
249 | 0 | } |
250 | | |
251 | | InstructionCost HexagonTTIImpl::getGatherScatterOpCost( |
252 | | unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, |
253 | 0 | Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { |
254 | 0 | return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, |
255 | 0 | Alignment, CostKind, I); |
256 | 0 | } |
257 | | |
258 | | InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( |
259 | | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
260 | | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
261 | 0 | bool UseMaskForCond, bool UseMaskForGaps) { |
262 | 0 | if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) |
263 | 0 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
264 | 0 | Alignment, AddressSpace, |
265 | 0 | CostKind, |
266 | 0 | UseMaskForCond, UseMaskForGaps); |
267 | 0 | return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, |
268 | 0 | CostKind); |
269 | 0 | } |
270 | | |
271 | | InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
272 | | Type *CondTy, |
273 | | CmpInst::Predicate VecPred, |
274 | | TTI::TargetCostKind CostKind, |
275 | 6 | const Instruction *I) { |
276 | 6 | if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { |
277 | 0 | if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy()) |
278 | 0 | return InstructionCost::getMax(); |
279 | 0 | std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy); |
280 | 0 | if (Opcode == Instruction::FCmp) |
281 | 0 | return LT.first + FloatFactor * getTypeNumElements(ValTy); |
282 | 0 | } |
283 | 6 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); |
284 | 6 | } |
285 | | |
286 | | InstructionCost HexagonTTIImpl::getArithmeticInstrCost( |
287 | | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
288 | | TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, |
289 | | ArrayRef<const Value *> Args, |
290 | 26 | const Instruction *CxtI) { |
291 | | // TODO: Handle more cost kinds. |
292 | 26 | if (CostKind != TTI::TCK_RecipThroughput) |
293 | 26 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, |
294 | 26 | Op2Info, Args, CxtI); |
295 | | |
296 | 0 | if (Ty->isVectorTy()) { |
297 | 0 | if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy()) |
298 | 0 | return InstructionCost::getMax(); |
299 | 0 | std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); |
300 | 0 | if (LT.second.isFloatingPoint()) |
301 | 0 | return LT.first + FloatFactor * getTypeNumElements(Ty); |
302 | 0 | } |
303 | 0 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, |
304 | 0 | Args, CxtI); |
305 | 0 | } |
306 | | |
307 | | InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, |
308 | | Type *SrcTy, |
309 | | TTI::CastContextHint CCH, |
310 | | TTI::TargetCostKind CostKind, |
311 | 0 | const Instruction *I) { |
312 | 0 | auto isNonHVXFP = [this] (Type *Ty) { |
313 | 0 | return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy(); |
314 | 0 | }; |
315 | 0 | if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy)) |
316 | 0 | return InstructionCost::getMax(); |
317 | | |
318 | 0 | if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { |
319 | 0 | unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; |
320 | 0 | unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; |
321 | |
|
322 | 0 | std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy); |
323 | 0 | std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy); |
324 | 0 | InstructionCost Cost = |
325 | 0 | std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); |
326 | | // TODO: Allow non-throughput costs that aren't binary. |
327 | 0 | if (CostKind != TTI::TCK_RecipThroughput) |
328 | 0 | return Cost == 0 ? 0 : 1; |
329 | 0 | return Cost; |
330 | 0 | } |
331 | 0 | return 1; |
332 | 0 | } |
333 | | |
334 | | InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, |
335 | | TTI::TargetCostKind CostKind, |
336 | | unsigned Index, Value *Op0, |
337 | 0 | Value *Op1) { |
338 | 0 | Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType() |
339 | 0 | : Val; |
340 | 0 | if (Opcode == Instruction::InsertElement) { |
341 | | // Need two rotations for non-zero index. |
342 | 0 | unsigned Cost = (Index != 0) ? 2 : 0; |
343 | 0 | if (ElemTy->isIntegerTy(32)) |
344 | 0 | return Cost; |
345 | | // If it's not a 32-bit value, there will need to be an extract. |
346 | 0 | return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind, |
347 | 0 | Index, Op0, Op1); |
348 | 0 | } |
349 | | |
350 | 0 | if (Opcode == Instruction::ExtractElement) |
351 | 0 | return 2; |
352 | | |
353 | 0 | return 1; |
354 | 0 | } |
355 | | |
356 | 0 | bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/) { |
357 | | // This function is called from scalarize-masked-mem-intrin, which runs |
358 | | // in pre-isel. Use ST directly instead of calling isHVXVectorType. |
359 | 0 | return HexagonMaskedVMem && ST.isTypeForHVX(DataType); |
360 | 0 | } |
361 | | |
362 | 0 | bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/) { |
363 | | // This function is called from scalarize-masked-mem-intrin, which runs |
364 | | // in pre-isel. Use ST directly instead of calling isHVXVectorType. |
365 | 0 | return HexagonMaskedVMem && ST.isTypeForHVX(DataType); |
366 | 0 | } |
367 | | |
368 | | /// --- Vector TTI end --- |
369 | | |
370 | 0 | unsigned HexagonTTIImpl::getPrefetchDistance() const { |
371 | 0 | return ST.getL1PrefetchDistance(); |
372 | 0 | } |
373 | | |
374 | 0 | unsigned HexagonTTIImpl::getCacheLineSize() const { |
375 | 0 | return ST.getL1CacheLineSize(); |
376 | 0 | } |
377 | | |
378 | | InstructionCost |
379 | | HexagonTTIImpl::getInstructionCost(const User *U, |
380 | | ArrayRef<const Value *> Operands, |
381 | 52 | TTI::TargetCostKind CostKind) { |
382 | 52 | auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { |
383 | 0 | if (!CI->isIntegerCast()) |
384 | 0 | return false; |
385 | | // Only extensions from an integer type shorter than 32-bit to i32 |
386 | | // can be folded into the load. |
387 | 0 | const DataLayout &DL = getDataLayout(); |
388 | 0 | unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy()); |
389 | 0 | unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy()); |
390 | 0 | if (DBW != 32 || SBW >= DBW) |
391 | 0 | return false; |
392 | | |
393 | 0 | const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); |
394 | | // Technically, this code could allow multiple uses of the load, and |
395 | | // check if all the uses are the same extension operation, but this |
396 | | // should be sufficient for most cases. |
397 | 0 | return LI && LI->hasOneUse(); |
398 | 0 | }; |
399 | | |
400 | 52 | if (const CastInst *CI = dyn_cast<const CastInst>(U)) |
401 | 0 | if (isCastFoldedIntoLoad(CI)) |
402 | 0 | return TargetTransformInfo::TCC_Free; |
403 | 52 | return BaseT::getInstructionCost(U, Operands, CostKind); |
404 | 52 | } |
405 | | |
406 | 0 | bool HexagonTTIImpl::shouldBuildLookupTables() const { |
407 | 0 | return EmitLookupTables; |
408 | 0 | } |