Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
/// \file
8
/// This file implements a TargetTransformInfo analysis pass specific to the
9
/// Hexagon target machine. It uses the target's detailed information to provide
10
/// more precise answers to certain TTI queries, while letting the target
11
/// independent and default TTI implementations handle the rest.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#include "HexagonTargetTransformInfo.h"
16
#include "HexagonSubtarget.h"
17
#include "llvm/Analysis/TargetTransformInfo.h"
18
#include "llvm/CodeGen/ValueTypes.h"
19
#include "llvm/IR/InstrTypes.h"
20
#include "llvm/IR/Instructions.h"
21
#include "llvm/IR/User.h"
22
#include "llvm/Support/Casting.h"
23
#include "llvm/Support/CommandLine.h"
24
#include "llvm/Transforms/Utils/LoopPeel.h"
25
#include "llvm/Transforms/Utils/UnrollLoop.h"
26
27
using namespace llvm;
28
29
#define DEBUG_TYPE "hexagontti"
30
31
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
32
    cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
33
34
static cl::opt<bool> EnableV68FloatAutoHVX(
35
    "force-hvx-float", cl::Hidden,
36
    cl::desc("Enable auto-vectorization of floatint point types on v68."));
37
38
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
39
    cl::init(true), cl::Hidden,
40
    cl::desc("Control lookup table emission on Hexagon target"));
41
42
static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
43
    cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));
44
45
// Constant "cost factor" to make floating point operations more expensive
46
// in terms of vectorization cost. This isn't the best way, but it should
47
// do. Ultimately, the cost should use cycles.
48
static const unsigned FloatFactor = 4;
49
50
0
bool HexagonTTIImpl::useHVX() const {
51
0
  return ST.useHVXOps() && HexagonAutoHVX;
52
0
}
53
54
0
bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {
55
0
  auto *VecTy = dyn_cast<VectorType>(Ty);
56
0
  if (!VecTy)
57
0
    return false;
58
0
  if (!ST.isTypeForHVX(VecTy))
59
0
    return false;
60
0
  if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
61
0
    return true;
62
0
  return ST.useHVXV68Ops() && EnableV68FloatAutoHVX;
63
0
}
64
65
0
unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
66
0
  if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
67
0
    return VTy->getNumElements();
68
0
  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
69
0
         "Expecting scalar type");
70
0
  return 1;
71
0
}
72
73
TargetTransformInfo::PopcntSupportKind
74
0
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
75
  // Return fast hardware support as every input < 64 bits will be promoted
76
  // to 64 bits.
77
0
  return TargetTransformInfo::PSK_FastHardware;
78
0
}
79
80
// The Hexagon target can unroll loops with run-time trip counts.
81
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
82
                                             TTI::UnrollingPreferences &UP,
83
0
                                             OptimizationRemarkEmitter *ORE) {
84
0
  UP.Runtime = UP.Partial = true;
85
0
}
86
87
void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
88
0
                                           TTI::PeelingPreferences &PP) {
89
0
  BaseT::getPeelingPreferences(L, SE, PP);
90
  // Only try to peel innermost loops with small runtime trip counts.
91
0
  if (L && L->isInnermost() && canPeel(L) &&
92
0
      SE.getSmallConstantTripCount(L) == 0 &&
93
0
      SE.getSmallConstantMaxTripCount(L) > 0 &&
94
0
      SE.getSmallConstantMaxTripCount(L) <= 5) {
95
0
    PP.PeelCount = 2;
96
0
  }
97
0
}
98
99
TTI::AddressingModeKind
100
HexagonTTIImpl::getPreferredAddressingMode(const Loop *L,
101
1.99k
                                           ScalarEvolution *SE) const {
102
1.99k
  return TTI::AMK_PostIndexed;
103
1.99k
}
104
105
/// --- Vector TTI begin ---
106
107
0
unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
108
0
  if (Vector)
109
0
    return useHVX() ? 32 : 0;
110
0
  return 32;
111
0
}
112
113
0
unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
114
0
  return useHVX() ? 2 : 1;
115
0
}
116
117
TypeSize
118
0
HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
119
0
  switch (K) {
120
0
  case TargetTransformInfo::RGK_Scalar:
121
0
    return TypeSize::getFixed(32);
122
0
  case TargetTransformInfo::RGK_FixedWidthVector:
123
0
    return TypeSize::getFixed(getMinVectorRegisterBitWidth());
124
0
  case TargetTransformInfo::RGK_ScalableVector:
125
0
    return TypeSize::getScalable(0);
126
0
  }
127
128
0
  llvm_unreachable("Unsupported register kind");
129
0
}
130
131
0
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
132
0
  return useHVX() ? ST.getVectorLength()*8 : 32;
133
0
}
134
135
ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
136
0
                                          bool IsScalable) const {
137
0
  assert(!IsScalable && "Scalable VFs are not supported for Hexagon");
138
0
  return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth);
139
0
}
140
141
InstructionCost HexagonTTIImpl::getScalarizationOverhead(
142
    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
143
0
    TTI::TargetCostKind CostKind) {
144
0
  return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
145
0
                                         CostKind);
146
0
}
147
148
InstructionCost
149
HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
150
                                                 ArrayRef<Type *> Tys,
151
0
                                                 TTI::TargetCostKind CostKind) {
152
0
  return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
153
0
}
154
155
InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
156
                                                 ArrayRef<Type *> Tys,
157
0
                                                 TTI::TargetCostKind CostKind) {
158
0
  return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
159
0
}
160
161
InstructionCost
162
HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
163
0
                                      TTI::TargetCostKind CostKind) {
164
0
  if (ICA.getID() == Intrinsic::bswap) {
165
0
    std::pair<InstructionCost, MVT> LT =
166
0
        getTypeLegalizationCost(ICA.getReturnType());
167
0
    return LT.first + 2;
168
0
  }
169
0
  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
170
0
}
171
172
InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *Tp,
173
                                                          ScalarEvolution *SE,
174
0
                                                          const SCEV *S) {
175
0
  return 0;
176
0
}
177
178
InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
179
                                                MaybeAlign Alignment,
180
                                                unsigned AddressSpace,
181
                                                TTI::TargetCostKind CostKind,
182
                                                TTI::OperandValueInfo OpInfo,
183
35
                                                const Instruction *I) {
184
35
  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
185
  // TODO: Handle other cost kinds.
186
35
  if (CostKind != TTI::TCK_RecipThroughput)
187
35
    return 1;
188
189
0
  if (Opcode == Instruction::Store)
190
0
    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
191
0
                                  CostKind, OpInfo, I);
192
193
0
  if (Src->isVectorTy()) {
194
0
    VectorType *VecTy = cast<VectorType>(Src);
195
0
    unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue();
196
0
    if (isHVXVectorType(VecTy)) {
197
0
      unsigned RegWidth =
198
0
          getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
199
0
              .getFixedValue();
200
0
      assert(RegWidth && "Non-zero vector register width expected");
201
      // Cost of HVX loads.
202
0
      if (VecWidth % RegWidth == 0)
203
0
        return VecWidth / RegWidth;
204
      // Cost of constructing HVX vector from scalar loads
205
0
      const Align RegAlign(RegWidth / 8);
206
0
      if (!Alignment || *Alignment > RegAlign)
207
0
        Alignment = RegAlign;
208
0
      assert(Alignment);
209
0
      unsigned AlignWidth = 8 * Alignment->value();
210
0
      unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
211
0
      return 3 * NumLoads;
212
0
    }
213
214
    // Non-HVX vectors.
215
    // Add extra cost for floating point types.
216
0
    unsigned Cost =
217
0
        VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
218
219
    // At this point unspecified alignment is considered as Align(1).
220
0
    const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
221
0
    unsigned AlignWidth = 8 * BoundAlignment.value();
222
0
    unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
223
0
    if (Alignment == Align(4) || Alignment == Align(8))
224
0
      return Cost * NumLoads;
225
    // Loads of less than 32 bits will need extra inserts to compose a vector.
226
0
    assert(BoundAlignment <= Align(8));
227
0
    unsigned LogA = Log2(BoundAlignment);
228
0
    return (3 - LogA) * Cost * NumLoads;
229
0
  }
230
231
0
  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
232
0
                                OpInfo, I);
233
0
}
234
235
InstructionCost
236
HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
237
                                      Align Alignment, unsigned AddressSpace,
238
0
                                      TTI::TargetCostKind CostKind) {
239
0
  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
240
0
                                      CostKind);
241
0
}
242
243
InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
244
                                               ArrayRef<int> Mask,
245
                                               TTI::TargetCostKind CostKind,
246
                                               int Index, Type *SubTp,
247
0
                                               ArrayRef<const Value *> Args) {
248
0
  return 1;
249
0
}
250
251
InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
252
    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
253
0
    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
254
0
  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
255
0
                                       Alignment, CostKind, I);
256
0
}
257
258
InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
259
    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
260
    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
261
0
    bool UseMaskForCond, bool UseMaskForGaps) {
262
0
  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
263
0
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
264
0
                                             Alignment, AddressSpace,
265
0
                                             CostKind,
266
0
                                             UseMaskForCond, UseMaskForGaps);
267
0
  return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
268
0
                         CostKind);
269
0
}
270
271
InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
272
                                                   Type *CondTy,
273
                                                   CmpInst::Predicate VecPred,
274
                                                   TTI::TargetCostKind CostKind,
275
6
                                                   const Instruction *I) {
276
6
  if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
277
0
    if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())
278
0
      return InstructionCost::getMax();
279
0
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
280
0
    if (Opcode == Instruction::FCmp)
281
0
      return LT.first + FloatFactor * getTypeNumElements(ValTy);
282
0
  }
283
6
  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
284
6
}
285
286
InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
287
    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
288
    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
289
    ArrayRef<const Value *> Args,
290
26
    const Instruction *CxtI) {
291
  // TODO: Handle more cost kinds.
292
26
  if (CostKind != TTI::TCK_RecipThroughput)
293
26
    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
294
26
                                         Op2Info, Args, CxtI);
295
296
0
  if (Ty->isVectorTy()) {
297
0
    if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
298
0
      return InstructionCost::getMax();
299
0
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
300
0
    if (LT.second.isFloatingPoint())
301
0
      return LT.first + FloatFactor * getTypeNumElements(Ty);
302
0
  }
303
0
  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
304
0
                                       Args, CxtI);
305
0
}
306
307
InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
308
                                                 Type *SrcTy,
309
                                                 TTI::CastContextHint CCH,
310
                                                 TTI::TargetCostKind CostKind,
311
0
                                                 const Instruction *I) {
312
0
  auto isNonHVXFP = [this] (Type *Ty) {
313
0
    return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
314
0
  };
315
0
  if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
316
0
    return InstructionCost::getMax();
317
318
0
  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
319
0
    unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
320
0
    unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
321
322
0
    std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy);
323
0
    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
324
0
    InstructionCost Cost =
325
0
        std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
326
    // TODO: Allow non-throughput costs that aren't binary.
327
0
    if (CostKind != TTI::TCK_RecipThroughput)
328
0
      return Cost == 0 ? 0 : 1;
329
0
    return Cost;
330
0
  }
331
0
  return 1;
332
0
}
333
334
InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
335
                                                   TTI::TargetCostKind CostKind,
336
                                                   unsigned Index, Value *Op0,
337
0
                                                   Value *Op1) {
338
0
  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
339
0
                                   : Val;
340
0
  if (Opcode == Instruction::InsertElement) {
341
    // Need two rotations for non-zero index.
342
0
    unsigned Cost = (Index != 0) ? 2 : 0;
343
0
    if (ElemTy->isIntegerTy(32))
344
0
      return Cost;
345
    // If it's not a 32-bit value, there will need to be an extract.
346
0
    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind,
347
0
                                     Index, Op0, Op1);
348
0
  }
349
350
0
  if (Opcode == Instruction::ExtractElement)
351
0
    return 2;
352
353
0
  return 1;
354
0
}
355
356
0
bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/) {
357
  // This function is called from scalarize-masked-mem-intrin, which runs
358
  // in pre-isel. Use ST directly instead of calling isHVXVectorType.
359
0
  return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
360
0
}
361
362
0
bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/) {
363
  // This function is called from scalarize-masked-mem-intrin, which runs
364
  // in pre-isel. Use ST directly instead of calling isHVXVectorType.
365
0
  return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
366
0
}
367
368
/// --- Vector TTI end ---
369
370
0
unsigned HexagonTTIImpl::getPrefetchDistance() const {
371
0
  return ST.getL1PrefetchDistance();
372
0
}
373
374
0
unsigned HexagonTTIImpl::getCacheLineSize() const {
375
0
  return ST.getL1CacheLineSize();
376
0
}
377
378
InstructionCost
379
HexagonTTIImpl::getInstructionCost(const User *U,
380
                                   ArrayRef<const Value *> Operands,
381
52
                                   TTI::TargetCostKind CostKind) {
382
52
  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
383
0
    if (!CI->isIntegerCast())
384
0
      return false;
385
    // Only extensions from an integer type shorter than 32-bit to i32
386
    // can be folded into the load.
387
0
    const DataLayout &DL = getDataLayout();
388
0
    unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
389
0
    unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
390
0
    if (DBW != 32 || SBW >= DBW)
391
0
      return false;
392
393
0
    const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
394
    // Technically, this code could allow multiple uses of the load, and
395
    // check if all the uses are the same extension operation, but this
396
    // should be sufficient for most cases.
397
0
    return LI && LI->hasOneUse();
398
0
  };
399
400
52
  if (const CastInst *CI = dyn_cast<const CastInst>(U))
401
0
    if (isCastFoldedIntoLoad(CI))
402
0
      return TargetTransformInfo::TCC_Free;
403
52
  return BaseT::getInstructionCost(U, Operands, CostKind);
404
52
}
405
406
0
bool HexagonTTIImpl::shouldBuildLookupTables() const {
407
0
  return EmitLookupTables;
408
0
}