/src/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

Source (jump to first uncovered line)
//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
/// Hexagon target machine. It uses the target's detailed information to provide
/// more precise answers to certain TTI queries, while letting the target
/// independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//

#include "HexagonTargetTransformInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"

using namespace llvm;

#define DEBUG_TYPE "hexagontti"

static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
    cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));

static cl::opt<bool> EnableV68FloatAutoHVX(
    "force-hvx-float", cl::Hidden,
    cl::desc("Enable auto-vectorization of floatint point types on v68."));

static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
    cl::init(true), cl::Hidden,
    cl::desc("Control lookup table emission on Hexagon target"));

static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
    cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));

// Constant "cost factor" to make floating point operations more expensive
// in terms of vectorization cost. This isn't the best way, but it should
// do. Ultimately, the cost should use cycles.
static const unsigned FloatFactor = 4;

bool HexagonTTIImpl::useHVX() const {
  return ST.useHVXOps() && HexagonAutoHVX;
}

bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {
  auto *VecTy = dyn_cast<VectorType>(Ty);
  if (!VecTy)
    return false;
  if (!ST.isTypeForHVX(VecTy))
    return false;
  if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
    return true;
  return ST.useHVXV68Ops() && EnableV68FloatAutoHVX;
}

unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
  if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
    return VTy->getNumElements();
  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
         "Expecting scalar type");
  return 1;
}

TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
  // Return fast hardware support as every input < 64 bits will be promoted
  // to 64 bits.
  return TargetTransformInfo::PSK_FastHardware;
}

// The Hexagon target can unroll loops with run-time trip counts.
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                             TTI::UnrollingPreferences &UP,
                                             OptimizationRemarkEmitter *ORE) {
  UP.Runtime = UP.Partial = true;
}

void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                                           TTI::PeelingPreferences &PP) {
  BaseT::getPeelingPreferences(L, SE, PP);
  // Only try to peel innermost loops with small runtime trip counts.
  if (L && L->isInnermost() && canPeel(L) &&
      SE.getSmallConstantTripCount(L) == 0 &&
      SE.getSmallConstantMaxTripCount(L) > 0 &&
      SE.getSmallConstantMaxTripCount(L) <= 5) {
    PP.PeelCount = 2;
  }
}

TTI::AddressingModeKind
HexagonTTIImpl::getPreferredAddressingMode(const Loop *L,
                                           ScalarEvolution *SE) const {
  return TTI::AMK_PostIndexed;
}

/// --- Vector TTI begin ---

unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
  if (Vector)
    return useHVX() ? 32 : 0;
  return 32;
}

unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
  return useHVX() ? 2 : 1;
}

TypeSize
HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  switch (K) {
  case TargetTransformInfo::RGK_Scalar:
    return TypeSize::getFixed(32);
  case TargetTransformInfo::RGK_FixedWidthVector:
    return TypeSize::getFixed(getMinVectorRegisterBitWidth());
  case TargetTransformInfo::RGK_ScalableVector:
    return TypeSize::getScalable(0);
  }

  llvm_unreachable("Unsupported register kind");
}

unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
  return useHVX() ? ST.getVectorLength()*8 : 32;
}

ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
                                          bool IsScalable) const {
  assert(!IsScalable && "Scalable VFs are not supported for Hexagon");
  return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth);
}

InstructionCost HexagonTTIImpl::getScalarizationOverhead(
    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
    TTI::TargetCostKind CostKind) {
  return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
                                         CostKind);
}

InstructionCost
HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
                                                 ArrayRef<Type *> Tys,
                                                 TTI::TargetCostKind CostKind) {
  return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
}

InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
                                                 ArrayRef<Type *> Tys,
                                                 TTI::TargetCostKind CostKind) {
  return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
}

InstructionCost
HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                      TTI::TargetCostKind CostKind) {
  if (ICA.getID() == Intrinsic::bswap) {
    std::pair<InstructionCost, MVT> LT =
        getTypeLegalizationCost(ICA.getReturnType());
    return LT.first + 2;
  }
  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}

InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *Tp,
                                                          ScalarEvolution *SE,
                                                          const SCEV *S) {
  return 0;
}

InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                                MaybeAlign Alignment,
                                                unsigned AddressSpace,
                                                TTI::TargetCostKind CostKind,
                                                TTI::OperandValueInfo OpInfo,
                                                const Instruction *I) {
  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
  // TODO: Handle other cost kinds.
  if (CostKind != TTI::TCK_RecipThroughput)
    return 1;

  if (Opcode == Instruction::Store)
    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                  CostKind, OpInfo, I);

  if (Src->isVectorTy()) {
    VectorType *VecTy = cast<VectorType>(Src);
    unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue();
    if (isHVXVectorType(VecTy)) {
      unsigned RegWidth =
          getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
              .getFixedValue();
      assert(RegWidth && "Non-zero vector register width expected");
      // Cost of HVX loads.
      if (VecWidth % RegWidth == 0)
        return VecWidth / RegWidth;
      // Cost of constructing HVX vector from scalar loads
      const Align RegAlign(RegWidth / 8);
      if (!Alignment || *Alignment > RegAlign)
        Alignment = RegAlign;
      assert(Alignment);
      unsigned AlignWidth = 8 * Alignment->value();
      unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
      return 3 * NumLoads;
    }

    // Non-HVX vectors.
    // Add extra cost for floating point types.
    unsigned Cost =
        VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;

    // At this point unspecified alignment is considered as Align(1).
    const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
    unsigned AlignWidth = 8 * BoundAlignment.value();
    unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
    if (Alignment == Align(4) || Alignment == Align(8))
      return Cost * NumLoads;
    // Loads of less than 32 bits will need extra inserts to compose a vector.
    assert(BoundAlignment <= Align(8));
    unsigned LogA = Log2(BoundAlignment);
    return (3 - LogA) * Cost * NumLoads;
  }

  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
                                OpInfo, I);
}

InstructionCost
HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
                                      Align Alignment, unsigned AddressSpace,
                                      TTI::TargetCostKind CostKind) {
  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                      CostKind);
}

InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
                                               ArrayRef<int> Mask,
                                               TTI::TargetCostKind CostKind,
                                               int Index, Type *SubTp,
                                               ArrayRef<const Value *> Args) {
  return 1;
}

InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                       Alignment, CostKind, I);
}

InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
    bool UseMaskForCond, bool UseMaskForGaps) {
  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                             Alignment, AddressSpace,
                                             CostKind,
                                             UseMaskForCond, UseMaskForGaps);
  return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
                         CostKind);
}

InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                                   Type *CondTy,
                                                   CmpInst::Predicate VecPred,
                                                   TTI::TargetCostKind CostKind,
                                                   const Instruction *I) {
  if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
    if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())
      return InstructionCost::getMax();
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
    if (Opcode == Instruction::FCmp)
      return LT.first + FloatFactor * getTypeNumElements(ValTy);
  }
  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}

InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
    ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
  // TODO: Handle more cost kinds.
  if (CostKind != TTI::TCK_RecipThroughput)
    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
                                         Op2Info, Args, CxtI);

  if (Ty->isVectorTy()) {
    if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
      return InstructionCost::getMax();
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
    if (LT.second.isFloatingPoint())
      return LT.first + FloatFactor * getTypeNumElements(Ty);
  }
  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
                                       Args, CxtI);
}

InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
                                                 Type *SrcTy,
                                                 TTI::CastContextHint CCH,
                                                 TTI::TargetCostKind CostKind,
                                                 const Instruction *I) {
  auto isNonHVXFP = [this] (Type *Ty) {
    return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
  };
  if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
    return InstructionCost::getMax();

  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
    unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
    unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;

    std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy);
    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
    InstructionCost Cost =
        std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
    // TODO: Allow non-throughput costs that aren't binary.
    if (CostKind != TTI::TCK_RecipThroughput)
      return Cost == 0 ? 0 : 1;
    return Cost;
  }
  return 1;
}

InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                   TTI::TargetCostKind CostKind,
                                                   unsigned Index, Value *Op0,
                                                   Value *Op1) {
  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
                                   : Val;
  if (Opcode == Instruction::InsertElement) {
    // Need two rotations for non-zero index.
    unsigned Cost = (Index != 0) ? 2 : 0;
    if (ElemTy->isIntegerTy(32))
      return Cost;
    // If it's not a 32-bit value, there will need to be an extract.
    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind,
                                     Index, Op0, Op1);
  }

  if (Opcode == Instruction::ExtractElement)
    return 2;

  return 1;
}

bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/) {
  // This function is called from scalarize-masked-mem-intrin, which runs
  // in pre-isel. Use ST directly instead of calling isHVXVectorType.
  return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}

bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/) {
  // This function is called from scalarize-masked-mem-intrin, which runs
  // in pre-isel. Use ST directly instead of calling isHVXVectorType.
  return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}

/// --- Vector TTI end ---

unsigned HexagonTTIImpl::getPrefetchDistance() const {
  return ST.getL1PrefetchDistance();
}

unsigned HexagonTTIImpl::getCacheLineSize() const {
  return ST.getL1CacheLineSize();
}

InstructionCost
HexagonTTIImpl::getInstructionCost(const User *U,
                                   ArrayRef<const Value *> Operands,
                                   TTI::TargetCostKind CostKind) {
  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
    if (!CI->isIntegerCast())
      return false;
    // Only extensions from an integer type shorter than 32-bit to i32
    // can be folded into the load.
    const DataLayout &DL = getDataLayout();
    unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
    unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
    if (DBW != 32 || SBW >= DBW)
      return false;

    const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
    // Technically, this code could allow multiple uses of the load, and
    // check if all the uses are the same extension operation, but this
    // should be sufficient for most cases.
    return LI && LI->hasOneUse();
  };

  if (const CastInst *CI = dyn_cast<const CastInst>(U))
    if (isCastFoldedIntoLoad(CI))
      return TargetTransformInfo::TCC_Free;
  return BaseT::getInstructionCost(U, Operands, CostKind);
}

bool HexagonTTIImpl::shouldBuildLookupTables() const {
  return EmitLookupTables;
}

Coverage Report

Created: 2024-01-17 10:31

Line	Count	Source (jump to first uncovered line)
1		//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2		//
3		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4		// See https://llvm.org/LICENSE.txt for license information.
5		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6		//
7		/// \file
8		/// This file implements a TargetTransformInfo analysis pass specific to the
9		/// Hexagon target machine. It uses the target's detailed information to provide
10		/// more precise answers to certain TTI queries, while letting the target
11		/// independent and default TTI implementations handle the rest.
12		///
13		//===----------------------------------------------------------------------===//
14
15		#include "HexagonTargetTransformInfo.h"
16		#include "HexagonSubtarget.h"
17		#include "llvm/Analysis/TargetTransformInfo.h"
18		#include "llvm/CodeGen/ValueTypes.h"
19		#include "llvm/IR/InstrTypes.h"
20		#include "llvm/IR/Instructions.h"
21		#include "llvm/IR/User.h"
22		#include "llvm/Support/Casting.h"
23		#include "llvm/Support/CommandLine.h"
24		#include "llvm/Transforms/Utils/LoopPeel.h"
25		#include "llvm/Transforms/Utils/UnrollLoop.h"
26
27		using namespace llvm;
28
29		#define DEBUG_TYPE "hexagontti"
30
31		static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
32		cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
33
34		static cl::opt<bool> EnableV68FloatAutoHVX(
35		"force-hvx-float", cl::Hidden,
36		cl::desc("Enable auto-vectorization of floatint point types on v68."));
37
38		static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
39		cl::init(true), cl::Hidden,
40		cl::desc("Control lookup table emission on Hexagon target"));
41
42		static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(true),
43		cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));
44
45		// Constant "cost factor" to make floating point operations more expensive
46		// in terms of vectorization cost. This isn't the best way, but it should
47		// do. Ultimately, the cost should use cycles.
48		static const unsigned FloatFactor = 4;
49
50	0	bool HexagonTTIImpl::useHVX() const {
51	0	return ST.useHVXOps() && HexagonAutoHVX;
52	0	}
53
54	0	bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {
55	0	auto *VecTy = dyn_cast<VectorType>(Ty);
56	0	if (!VecTy)
57	0	return false;
58	0	if (!ST.isTypeForHVX(VecTy))
59	0	return false;
60	0	if (ST.useHVXV69Ops() \|\| !VecTy->getElementType()->isFloatingPointTy())
61	0	return true;
62	0	return ST.useHVXV68Ops() && EnableV68FloatAutoHVX;
63	0	}
64
65	0	unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
66	0	if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
67	0	return VTy->getNumElements();
68	0	assert((Ty->isIntegerTy() \|\| Ty->isFloatingPointTy()) &&
69	0	"Expecting scalar type");
70	0	return 1;
71	0	}
72
73		TargetTransformInfo::PopcntSupportKind
74	0	HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
75		// Return fast hardware support as every input < 64 bits will be promoted
76		// to 64 bits.
77	0	return TargetTransformInfo::PSK_FastHardware;
78	0	}
79
80		// The Hexagon target can unroll loops with run-time trip counts.
81		void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
82		TTI::UnrollingPreferences &UP,
83	0	OptimizationRemarkEmitter *ORE) {
84	0	UP.Runtime = UP.Partial = true;
85	0	}
86
87		void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
88	0	TTI::PeelingPreferences &PP) {
89	0	BaseT::getPeelingPreferences(L, SE, PP);
90		// Only try to peel innermost loops with small runtime trip counts.
91	0	if (L && L->isInnermost() && canPeel(L) &&
92	0	SE.getSmallConstantTripCount(L) == 0 &&
93	0	SE.getSmallConstantMaxTripCount(L) > 0 &&
94	0	SE.getSmallConstantMaxTripCount(L) <= 5) {
95	0	PP.PeelCount = 2;
96	0	}
97	0	}
98
99		TTI::AddressingModeKind
100		HexagonTTIImpl::getPreferredAddressingMode(const Loop *L,
101	1.99k	ScalarEvolution *SE) const {
102	1.99k	return TTI::AMK_PostIndexed;
103	1.99k	}
104
105		/// --- Vector TTI begin ---
106
107	0	unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
108	0	if (Vector)
109	0	return useHVX() ? 32 : 0;
110	0	return 32;
111	0	}
112
113	0	unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
114	0	return useHVX() ? 2 : 1;
115	0	}
116
117		TypeSize
118	0	HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
119	0	switch (K) {
120	0	case TargetTransformInfo::RGK_Scalar:
121	0	return TypeSize::getFixed(32);
122	0	case TargetTransformInfo::RGK_FixedWidthVector:
123	0	return TypeSize::getFixed(getMinVectorRegisterBitWidth());
124	0	case TargetTransformInfo::RGK_ScalableVector:
125	0	return TypeSize::getScalable(0);
126	0	}
127
128	0	llvm_unreachable("Unsupported register kind");
129	0	}
130
131	0	unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
132	0	return useHVX() ? ST.getVectorLength()*8 : 32;
133	0	}
134
135		ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
136	0	bool IsScalable) const {
137	0	assert(!IsScalable && "Scalable VFs are not supported for Hexagon");
138	0	return ElementCount::getFixed((8 * ST.getVectorLength()) / ElemWidth);
139	0	}
140
141		InstructionCost HexagonTTIImpl::getScalarizationOverhead(
142		VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
143	0	TTI::TargetCostKind CostKind) {
144	0	return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
145	0	CostKind);
146	0	}
147
148		InstructionCost
149		HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
150		ArrayRef<Type *> Tys,
151	0	TTI::TargetCostKind CostKind) {
152	0	return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
153	0	}
154
155		InstructionCost HexagonTTIImpl::getCallInstrCost(Function F, Type RetTy,
156		ArrayRef<Type *> Tys,
157	0	TTI::TargetCostKind CostKind) {
158	0	return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
159	0	}
160
161		InstructionCost
162		HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
163	0	TTI::TargetCostKind CostKind) {
164	0	if (ICA.getID() == Intrinsic::bswap) {
165	0	std::pair<InstructionCost, MVT> LT =
166	0	getTypeLegalizationCost(ICA.getReturnType());
167	0	return LT.first + 2;
168	0	}
169	0	return BaseT::getIntrinsicInstrCost(ICA, CostKind);
170	0	}
171
172		InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *Tp,
173		ScalarEvolution *SE,
174	0	const SCEV *S) {
175	0	return 0;
176	0	}
177
178		InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
179		MaybeAlign Alignment,
180		unsigned AddressSpace,
181		TTI::TargetCostKind CostKind,
182		TTI::OperandValueInfo OpInfo,
183	35	const Instruction *I) {
184	35	assert(Opcode == Instruction::Load \|\| Opcode == Instruction::Store);
185		// TODO: Handle other cost kinds.
186	35	if (CostKind != TTI::TCK_RecipThroughput)
187	35	return 1;
188
189	0	if (Opcode == Instruction::Store)
190	0	return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
191	0	CostKind, OpInfo, I);
192
193	0	if (Src->isVectorTy()) {
194	0	VectorType *VecTy = cast<VectorType>(Src);
195	0	unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue();
196	0	if (isHVXVectorType(VecTy)) {
197	0	unsigned RegWidth =
198	0	getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
199	0	.getFixedValue();
200	0	assert(RegWidth && "Non-zero vector register width expected");
201		// Cost of HVX loads.
202	0	if (VecWidth % RegWidth == 0)
203	0	return VecWidth / RegWidth;
204		// Cost of constructing HVX vector from scalar loads
205	0	const Align RegAlign(RegWidth / 8);
206	0	if (!Alignment \|\| *Alignment > RegAlign)
207	0	Alignment = RegAlign;
208	0	assert(Alignment);
209	0	unsigned AlignWidth = 8 * Alignment->value();
210	0	unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
211	0	return 3 * NumLoads;
212	0	}
213
214		// Non-HVX vectors.
215		// Add extra cost for floating point types.
216	0	unsigned Cost =
217	0	VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
218
219		// At this point unspecified alignment is considered as Align(1).
220	0	const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8));
221	0	unsigned AlignWidth = 8 * BoundAlignment.value();
222	0	unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
223	0	if (Alignment == Align(4) \|\| Alignment == Align(8))
224	0	return Cost * NumLoads;
225		// Loads of less than 32 bits will need extra inserts to compose a vector.
226	0	assert(BoundAlignment <= Align(8));
227	0	unsigned LogA = Log2(BoundAlignment);
228	0	return (3 - LogA) * Cost * NumLoads;
229	0	}
230
231	0	return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
232	0	OpInfo, I);
233	0	}
234
235		InstructionCost
236		HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
237		Align Alignment, unsigned AddressSpace,
238	0	TTI::TargetCostKind CostKind) {
239	0	return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
240	0	CostKind);
241	0	}
242
243		InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
244		ArrayRef<int> Mask,
245		TTI::TargetCostKind CostKind,
246		int Index, Type *SubTp,
247	0	ArrayRef<const Value *> Args) {
248	0	return 1;
249	0	}
250
251		InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
252		unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
253	0	Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
254	0	return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
255	0	Alignment, CostKind, I);
256	0	}
257
258		InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
259		unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
260		Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
261	0	bool UseMaskForCond, bool UseMaskForGaps) {
262	0	if (Indices.size() != Factor \|\| UseMaskForCond \|\| UseMaskForGaps)
263	0	return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
264	0	Alignment, AddressSpace,
265	0	CostKind,
266	0	UseMaskForCond, UseMaskForGaps);
267	0	return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace,
268	0	CostKind);
269	0	}
270
271		InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
272		Type *CondTy,
273		CmpInst::Predicate VecPred,
274		TTI::TargetCostKind CostKind,
275	6	const Instruction *I) {
276	6	if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
277	0	if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())
278	0	return InstructionCost::getMax();
279	0	std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
280	0	if (Opcode == Instruction::FCmp)
281	0	return LT.first + FloatFactor * getTypeNumElements(ValTy);
282	0	}
283	6	return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
284	6	}
285
286		InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
287		unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
288		TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
289		ArrayRef<const Value *> Args,
290	26	const Instruction *CxtI) {
291		// TODO: Handle more cost kinds.
292	26	if (CostKind != TTI::TCK_RecipThroughput)
293	26	return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
294	26	Op2Info, Args, CxtI);
295
296	0	if (Ty->isVectorTy()) {
297	0	if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
298	0	return InstructionCost::getMax();
299	0	std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
300	0	if (LT.second.isFloatingPoint())
301	0	return LT.first + FloatFactor * getTypeNumElements(Ty);
302	0	}
303	0	return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
304	0	Args, CxtI);
305	0	}
306
307		InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
308		Type *SrcTy,
309		TTI::CastContextHint CCH,
310		TTI::TargetCostKind CostKind,
311	0	const Instruction *I) {
312	0	auto isNonHVXFP = [this] (Type *Ty) {
313	0	return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
314	0	};
315	0	if (isNonHVXFP(SrcTy) \|\| isNonHVXFP(DstTy))
316	0	return InstructionCost::getMax();
317
318	0	if (SrcTy->isFPOrFPVectorTy() \|\| DstTy->isFPOrFPVectorTy()) {
319	0	unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
320	0	unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
321
322	0	std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy);
323	0	std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
324	0	InstructionCost Cost =
325	0	std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
326		// TODO: Allow non-throughput costs that aren't binary.
327	0	if (CostKind != TTI::TCK_RecipThroughput)
328	0	return Cost == 0 ? 0 : 1;
329	0	return Cost;
330	0	}
331	0	return 1;
332	0	}
333
334		InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
335		TTI::TargetCostKind CostKind,
336		unsigned Index, Value *Op0,
337	0	Value *Op1) {
338	0	Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
339	0	: Val;
340	0	if (Opcode == Instruction::InsertElement) {
341		// Need two rotations for non-zero index.
342	0	unsigned Cost = (Index != 0) ? 2 : 0;
343	0	if (ElemTy->isIntegerTy(32))
344	0	return Cost;
345		// If it's not a 32-bit value, there will need to be an extract.
346	0	return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind,
347	0	Index, Op0, Op1);
348	0	}
349
350	0	if (Opcode == Instruction::ExtractElement)
351	0	return 2;
352
353	0	return 1;
354	0	}
355
356	0	bool HexagonTTIImpl::isLegalMaskedStore(Type DataType, Align /Alignment*/) {
357		// This function is called from scalarize-masked-mem-intrin, which runs
358		// in pre-isel. Use ST directly instead of calling isHVXVectorType.
359	0	return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
360	0	}
361
362	0	bool HexagonTTIImpl::isLegalMaskedLoad(Type DataType, Align /Alignment*/) {
363		// This function is called from scalarize-masked-mem-intrin, which runs
364		// in pre-isel. Use ST directly instead of calling isHVXVectorType.
365	0	return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
366	0	}
367
368		/// --- Vector TTI end ---
369
370	0	unsigned HexagonTTIImpl::getPrefetchDistance() const {
371	0	return ST.getL1PrefetchDistance();
372	0	}
373
374	0	unsigned HexagonTTIImpl::getCacheLineSize() const {
375	0	return ST.getL1CacheLineSize();
376	0	}
377
378		InstructionCost
379		HexagonTTIImpl::getInstructionCost(const User *U,
380		ArrayRef<const Value *> Operands,
381	52	TTI::TargetCostKind CostKind) {
382	52	auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
383	0	if (!CI->isIntegerCast())
384	0	return false;
385		// Only extensions from an integer type shorter than 32-bit to i32
386		// can be folded into the load.
387	0	const DataLayout &DL = getDataLayout();
388	0	unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
389	0	unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
390	0	if (DBW != 32 \|\| SBW >= DBW)
391	0	return false;
392
393	0	const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
394		// Technically, this code could allow multiple uses of the load, and
395		// check if all the uses are the same extension operation, but this
396		// should be sufficient for most cases.
397	0	return LI && LI->hasOneUse();
398	0	};
399
400	52	if (const CastInst *CI = dyn_cast<const CastInst>(U))
401	0	if (isCastFoldedIntoLoad(CI))
402	0	return TargetTransformInfo::TCC_Free;
403	52	return BaseT::getInstructionCost(U, Operands, CostKind);
404	52	}
405
406	0	bool HexagonTTIImpl::shouldBuildLookupTables() const {
407	0	return EmitLookupTables;
408	0	}