/src/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
10 | | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H |
11 | | |
12 | | #include "Utils/AMDGPUBaseInfo.h" |
13 | | #include "llvm/ADT/DenseMap.h" |
14 | | #include "llvm/CodeGen/MachineFunction.h" |
15 | | #include "llvm/IR/DataLayout.h" |
16 | | #include "llvm/IR/Function.h" |
17 | | #include "llvm/IR/GlobalValue.h" |
18 | | #include "llvm/IR/GlobalVariable.h" |
19 | | |
20 | | namespace llvm { |
21 | | |
22 | | class AMDGPUSubtarget; |
23 | | |
24 | | class AMDGPUMachineFunction : public MachineFunctionInfo { |
25 | | /// A map to keep track of local memory objects and their offsets within the |
26 | | /// local memory space. |
27 | | SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; |
28 | | |
29 | | protected: |
30 | | uint64_t ExplicitKernArgSize = 0; // Cache for this. |
31 | | Align MaxKernArgAlign; // Cache for this. |
32 | | |
33 | | /// Number of bytes in the LDS that are being used. |
34 | | uint32_t LDSSize = 0; |
35 | | uint32_t GDSSize = 0; |
36 | | |
37 | | /// Number of bytes in the LDS allocated statically. This field is only used |
38 | | /// in the instruction selector and not part of the machine function info. |
39 | | uint32_t StaticLDSSize = 0; |
40 | | uint32_t StaticGDSSize = 0; |
41 | | |
42 | | /// Align for dynamic shared memory if any. Dynamic shared memory is |
43 | | /// allocated directly after the static one, i.e., LDSSize. Need to pad |
44 | | /// LDSSize to ensure that dynamic one is aligned accordingly. |
45 | | /// The maximal alignment is updated during IR translation or lowering |
46 | | /// stages. |
47 | | Align DynLDSAlign; |
48 | | |
49 | | // Flag to check dynamic LDS usage by kernel. |
50 | | bool UsesDynamicLDS = false; |
51 | | |
52 | | // Kernels + shaders. i.e. functions called by the hardware and not called |
53 | | // by other functions. |
54 | | bool IsEntryFunction = false; |
55 | | |
56 | | // Entry points called by other functions instead of directly by the hardware. |
57 | | bool IsModuleEntryFunction = false; |
58 | | |
59 | | // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. |
60 | | bool IsChainFunction = false; |
61 | | |
62 | | bool NoSignedZerosFPMath = false; |
63 | | |
64 | | // Function may be memory bound. |
65 | | bool MemoryBound = false; |
66 | | |
67 | | // Kernel may need limited waves per EU for better performance. |
68 | | bool WaveLimiter = false; |
69 | | |
70 | | public: |
71 | | AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); |
72 | | |
73 | 0 | uint64_t getExplicitKernArgSize() const { |
74 | 0 | return ExplicitKernArgSize; |
75 | 0 | } |
76 | | |
77 | 0 | Align getMaxKernArgAlign() const { return MaxKernArgAlign; } |
78 | | |
79 | 0 | uint32_t getLDSSize() const { |
80 | 0 | return LDSSize; |
81 | 0 | } |
82 | | |
83 | 0 | uint32_t getGDSSize() const { |
84 | 0 | return GDSSize; |
85 | 0 | } |
86 | | |
87 | 0 | bool isEntryFunction() const { |
88 | 0 | return IsEntryFunction; |
89 | 0 | } |
90 | | |
91 | 0 | bool isModuleEntryFunction() const { return IsModuleEntryFunction; } |
92 | | |
93 | 0 | bool isChainFunction() const { return IsChainFunction; } |
94 | | |
95 | | // The stack is empty upon entry to this function. |
96 | 0 | bool isBottomOfStack() const { |
97 | 0 | return isEntryFunction() || isChainFunction(); |
98 | 0 | } |
99 | | |
100 | 0 | bool hasNoSignedZerosFPMath() const { |
101 | 0 | return NoSignedZerosFPMath; |
102 | 0 | } |
103 | | |
104 | 0 | bool isMemoryBound() const { |
105 | 0 | return MemoryBound; |
106 | 0 | } |
107 | | |
108 | 0 | bool needsWaveLimiter() const { |
109 | 0 | return WaveLimiter; |
110 | 0 | } |
111 | | |
112 | 0 | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { |
113 | 0 | return allocateLDSGlobal(DL, GV, DynLDSAlign); |
114 | 0 | } |
115 | | |
116 | | unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, |
117 | | Align Trailing); |
118 | | |
119 | | static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); |
120 | | static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); |
121 | | |
122 | 0 | Align getDynLDSAlign() const { return DynLDSAlign; } |
123 | | |
124 | | void setDynLDSAlign(const Function &F, const GlobalVariable &GV); |
125 | | |
126 | | void setUsesDynamicLDS(bool DynLDS); |
127 | | |
128 | | bool isDynamicLDSUsed() const; |
129 | | }; |
130 | | |
131 | | } |
132 | | #endif |