/src/skia/src/gpu/vk/VulkanAMDMemoryAllocator.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2018 Google Inc. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license that can be |
5 | | * found in the LICENSE file. |
6 | | */ |
7 | | |
8 | | #include "src/gpu/vk/VulkanAMDMemoryAllocator.h" |
9 | | |
10 | | #include "include/gpu/vk/VulkanExtensions.h" |
11 | | #include "src/core/SkTraceEvent.h" |
12 | | #include "src/gpu/vk/VulkanInterface.h" |
13 | | |
14 | | namespace skgpu { |
15 | | |
16 | | #ifndef SK_USE_VMA |
17 | | sk_sp<VulkanMemoryAllocator> VulkanAMDMemoryAllocator::Make( |
18 | | VkInstance instance, |
19 | | VkPhysicalDevice physicalDevice, |
20 | | VkDevice device, |
21 | | uint32_t physicalDeviceVersion, |
22 | | const VulkanExtensions* extensions, |
23 | | const VulkanInterface* interface, |
24 | | bool threadSafe) { |
25 | | return nullptr; |
26 | | } |
27 | | #else |
28 | | |
29 | | sk_sp<VulkanMemoryAllocator> VulkanAMDMemoryAllocator::Make( |
30 | | VkInstance instance, |
31 | | VkPhysicalDevice physicalDevice, |
32 | | VkDevice device, |
33 | | uint32_t physicalDeviceVersion, |
34 | | const VulkanExtensions* extensions, |
35 | | const VulkanInterface* interface, |
36 | 0 | bool threadSafe) { |
37 | 0 | #define SKGPU_COPY_FUNCTION(NAME) functions.vk##NAME = interface->fFunctions.f##NAME |
38 | 0 | #define SKGPU_COPY_FUNCTION_KHR(NAME) functions.vk##NAME##KHR = interface->fFunctions.f##NAME |
39 | |
|
40 | 0 | VmaVulkanFunctions functions; |
41 | | // We should be setting all the required functions (at least through vulkan 1.1), but this is |
42 | | // just extra belt and suspenders to make sure there isn't unitialized values here. |
43 | 0 | memset(&functions, 0, sizeof(VmaVulkanFunctions)); |
44 | | |
45 | | // We don't use dynamic function getting in the allocator so we set the getProc functions to |
46 | | // null. |
47 | 0 | functions.vkGetInstanceProcAddr = nullptr; |
48 | 0 | functions.vkGetDeviceProcAddr = nullptr; |
49 | 0 | SKGPU_COPY_FUNCTION(GetPhysicalDeviceProperties); |
50 | 0 | SKGPU_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties); |
51 | 0 | SKGPU_COPY_FUNCTION(AllocateMemory); |
52 | 0 | SKGPU_COPY_FUNCTION(FreeMemory); |
53 | 0 | SKGPU_COPY_FUNCTION(MapMemory); |
54 | 0 | SKGPU_COPY_FUNCTION(UnmapMemory); |
55 | 0 | SKGPU_COPY_FUNCTION(FlushMappedMemoryRanges); |
56 | 0 | SKGPU_COPY_FUNCTION(InvalidateMappedMemoryRanges); |
57 | 0 | SKGPU_COPY_FUNCTION(BindBufferMemory); |
58 | 0 | SKGPU_COPY_FUNCTION(BindImageMemory); |
59 | 0 | SKGPU_COPY_FUNCTION(GetBufferMemoryRequirements); |
60 | 0 | SKGPU_COPY_FUNCTION(GetImageMemoryRequirements); |
61 | 0 | SKGPU_COPY_FUNCTION(CreateBuffer); |
62 | 0 | SKGPU_COPY_FUNCTION(DestroyBuffer); |
63 | 0 | SKGPU_COPY_FUNCTION(CreateImage); |
64 | 0 | SKGPU_COPY_FUNCTION(DestroyImage); |
65 | 0 | SKGPU_COPY_FUNCTION(CmdCopyBuffer); |
66 | 0 | SKGPU_COPY_FUNCTION_KHR(GetBufferMemoryRequirements2); |
67 | 0 | SKGPU_COPY_FUNCTION_KHR(GetImageMemoryRequirements2); |
68 | 0 | SKGPU_COPY_FUNCTION_KHR(BindBufferMemory2); |
69 | 0 | SKGPU_COPY_FUNCTION_KHR(BindImageMemory2); |
70 | 0 | SKGPU_COPY_FUNCTION_KHR(GetPhysicalDeviceMemoryProperties2); |
71 | |
|
72 | 0 | VmaAllocatorCreateInfo info; |
73 | 0 | info.flags = 0; |
74 | 0 | if (!threadSafe) { |
75 | 0 | info.flags |= VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; |
76 | 0 | } |
77 | 0 | if (physicalDeviceVersion >= VK_MAKE_VERSION(1, 1, 0) || |
78 | 0 | (extensions->hasExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, 1) && |
79 | 0 | extensions->hasExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, 1))) { |
80 | 0 | info.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; |
81 | 0 | } |
82 | |
|
83 | 0 | info.physicalDevice = physicalDevice; |
84 | 0 | info.device = device; |
85 | | // 4MB was picked for the size here by looking at memory usage of Android apps and runs of DM. |
86 | | // It seems to be a good compromise of not wasting unused allocated space and not making too |
87 | | // many small allocations. The AMD allocator will start making blocks at 1/8 the max size and |
88 | | // builds up block size as needed before capping at the max set here. |
89 | 0 | info.preferredLargeHeapBlockSize = 4*1024*1024; |
90 | 0 | info.pAllocationCallbacks = nullptr; |
91 | 0 | info.pDeviceMemoryCallbacks = nullptr; |
92 | 0 | info.pHeapSizeLimit = nullptr; |
93 | 0 | info.pVulkanFunctions = &functions; |
94 | 0 | info.instance = instance; |
95 | | // TODO: Update our interface and headers to support vulkan 1.3 and add in the new required |
96 | | // functions for 1.3 that the allocator needs. Until then we just clamp the version to 1.1. |
97 | 0 | info.vulkanApiVersion = std::min(physicalDeviceVersion, VK_MAKE_VERSION(1, 1, 0)); |
98 | 0 | info.pTypeExternalMemoryHandleTypes = nullptr; |
99 | |
|
100 | 0 | VmaAllocator allocator; |
101 | 0 | vmaCreateAllocator(&info, &allocator); |
102 | |
|
103 | 0 | return sk_sp<VulkanAMDMemoryAllocator>(new VulkanAMDMemoryAllocator(allocator)); |
104 | 0 | } |
105 | | |
106 | | VulkanAMDMemoryAllocator::VulkanAMDMemoryAllocator(VmaAllocator allocator) |
107 | 0 | : fAllocator(allocator) {} |
108 | | |
109 | 0 | VulkanAMDMemoryAllocator::~VulkanAMDMemoryAllocator() { |
110 | 0 | vmaDestroyAllocator(fAllocator); |
111 | 0 | fAllocator = VK_NULL_HANDLE; |
112 | 0 | } |
113 | | |
114 | | VkResult VulkanAMDMemoryAllocator::allocateImageMemory(VkImage image, |
115 | | uint32_t allocationPropertyFlags, |
116 | 0 | skgpu::VulkanBackendMemory* backendMemory) { |
117 | 0 | TRACE_EVENT0_ALWAYS("skia.gpu", TRACE_FUNC); |
118 | 0 | VmaAllocationCreateInfo info; |
119 | 0 | info.flags = 0; |
120 | 0 | info.usage = VMA_MEMORY_USAGE_UNKNOWN; |
121 | 0 | info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
122 | 0 | info.preferredFlags = 0; |
123 | 0 | info.memoryTypeBits = 0; |
124 | 0 | info.pool = VK_NULL_HANDLE; |
125 | 0 | info.pUserData = nullptr; |
126 | |
|
127 | 0 | if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) { |
128 | 0 | info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; |
129 | 0 | } |
130 | 0 | if (kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) { |
131 | 0 | info.requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; |
132 | 0 | } |
133 | 0 | if (kProtected_AllocationPropertyFlag & allocationPropertyFlags) { |
134 | 0 | info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT; |
135 | 0 | } |
136 | |
|
137 | 0 | VmaAllocation allocation; |
138 | 0 | VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr); |
139 | 0 | if (VK_SUCCESS == result) { |
140 | 0 | *backendMemory = (VulkanBackendMemory)allocation; |
141 | 0 | } |
142 | 0 | return result; |
143 | 0 | } |
144 | | |
145 | | VkResult VulkanAMDMemoryAllocator::allocateBufferMemory(VkBuffer buffer, |
146 | | BufferUsage usage, |
147 | | uint32_t allocationPropertyFlags, |
148 | 0 | skgpu::VulkanBackendMemory* backendMemory) { |
149 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
150 | 0 | VmaAllocationCreateInfo info; |
151 | 0 | info.flags = 0; |
152 | 0 | info.usage = VMA_MEMORY_USAGE_UNKNOWN; |
153 | 0 | info.memoryTypeBits = 0; |
154 | 0 | info.pool = VK_NULL_HANDLE; |
155 | 0 | info.pUserData = nullptr; |
156 | |
|
157 | 0 | switch (usage) { |
158 | 0 | case BufferUsage::kGpuOnly: |
159 | 0 | info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
160 | 0 | info.preferredFlags = 0; |
161 | 0 | break; |
162 | 0 | case BufferUsage::kCpuWritesGpuReads: |
163 | | // When doing cpu writes and gpu reads the general rule of thumb is to use coherent |
164 | | // memory. Though this depends on the fact that we are not doing any cpu reads and the |
165 | | // cpu writes are sequential. For sparse writes we'd want cpu cached memory, however we |
166 | | // don't do these types of writes in Skia. |
167 | | // |
168 | | // TODO: In the future there may be times where specific types of memory could benefit |
169 | | // from a coherent and cached memory. Typically these allow for the gpu to read cpu |
170 | | // writes from the cache without needing to flush the writes throughout the cache. The |
171 | | // reverse is not true and GPU writes tend to invalidate the cache regardless. Also |
172 | | // these gpu cache read access are typically lower bandwidth than non-cached memory. |
173 | | // For now Skia doesn't really have a need or want of this type of memory. But if we |
174 | | // ever do we could pass in an AllocationPropertyFlag that requests the cached property. |
175 | 0 | info.requiredFlags = |
176 | 0 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; |
177 | 0 | info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
178 | 0 | break; |
179 | 0 | case BufferUsage::kTransfersFromCpuToGpu: |
180 | 0 | info.requiredFlags = |
181 | 0 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; |
182 | 0 | info.preferredFlags = 0; |
183 | 0 | break; |
184 | 0 | case BufferUsage::kTransfersFromGpuToCpu: |
185 | 0 | info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; |
186 | 0 | info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
187 | 0 | break; |
188 | 0 | } |
189 | | |
190 | 0 | if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) { |
191 | 0 | info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; |
192 | 0 | } |
193 | 0 | if ((kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) && |
194 | 0 | BufferUsage::kGpuOnly == usage) { |
195 | 0 | info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; |
196 | 0 | } |
197 | |
|
198 | 0 | if (kPersistentlyMapped_AllocationPropertyFlag & allocationPropertyFlags) { |
199 | 0 | SkASSERT(BufferUsage::kGpuOnly != usage); |
200 | 0 | info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; |
201 | 0 | } |
202 | |
|
203 | 0 | VmaAllocation allocation; |
204 | 0 | VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); |
205 | 0 | if (VK_SUCCESS == result) { |
206 | 0 | *backendMemory = (VulkanBackendMemory)allocation; |
207 | 0 | } |
208 | |
|
209 | 0 | return result; |
210 | 0 | } Unexecuted instantiation: skgpu::VulkanAMDMemoryAllocator::allocateBufferMemory(VkBuffer_T*, skgpu::VulkanMemoryAllocator::BufferUsage, unsigned int, long*) Unexecuted instantiation: skgpu::VulkanAMDMemoryAllocator::allocateBufferMemory(VkBuffer_T*, skgpu::VulkanMemoryAllocator::BufferUsage, unsigned int, long*) |
211 | | |
212 | 0 | void VulkanAMDMemoryAllocator::freeMemory(const VulkanBackendMemory& memoryHandle) { |
213 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
214 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
215 | 0 | vmaFreeMemory(fAllocator, allocation); |
216 | 0 | } |
217 | | |
218 | | void VulkanAMDMemoryAllocator::getAllocInfo(const VulkanBackendMemory& memoryHandle, |
219 | 0 | VulkanAlloc* alloc) const { |
220 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
221 | 0 | VmaAllocationInfo vmaInfo; |
222 | 0 | vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo); |
223 | |
|
224 | 0 | VkMemoryPropertyFlags memFlags; |
225 | 0 | vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags); |
226 | |
|
227 | 0 | uint32_t flags = 0; |
228 | 0 | if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) { |
229 | 0 | flags |= VulkanAlloc::kMappable_Flag; |
230 | 0 | } |
231 | 0 | if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { |
232 | 0 | flags |= VulkanAlloc::kNoncoherent_Flag; |
233 | 0 | } |
234 | 0 | if (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT & memFlags) { |
235 | 0 | flags |= VulkanAlloc::kLazilyAllocated_Flag; |
236 | 0 | } |
237 | |
|
238 | 0 | alloc->fMemory = vmaInfo.deviceMemory; |
239 | 0 | alloc->fOffset = vmaInfo.offset; |
240 | 0 | alloc->fSize = vmaInfo.size; |
241 | 0 | alloc->fFlags = flags; |
242 | 0 | alloc->fBackendMemory = memoryHandle; |
243 | 0 | } |
244 | | |
245 | | VkResult VulkanAMDMemoryAllocator::mapMemory(const VulkanBackendMemory& memoryHandle, |
246 | 0 | void** data) { |
247 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
248 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
249 | 0 | return vmaMapMemory(fAllocator, allocation, data); |
250 | 0 | } |
251 | | |
252 | 0 | void VulkanAMDMemoryAllocator::unmapMemory(const VulkanBackendMemory& memoryHandle) { |
253 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
254 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
255 | 0 | vmaUnmapMemory(fAllocator, allocation); |
256 | 0 | } |
257 | | |
258 | | VkResult VulkanAMDMemoryAllocator::flushMemory(const VulkanBackendMemory& memoryHandle, |
259 | 0 | VkDeviceSize offset, VkDeviceSize size) { |
260 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
261 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
262 | 0 | return vmaFlushAllocation(fAllocator, allocation, offset, size); |
263 | 0 | } |
264 | | |
265 | | VkResult VulkanAMDMemoryAllocator::invalidateMemory(const VulkanBackendMemory& memoryHandle, |
266 | 0 | VkDeviceSize offset, VkDeviceSize size) { |
267 | 0 | TRACE_EVENT0("skia.gpu", TRACE_FUNC); |
268 | 0 | const VmaAllocation allocation = (VmaAllocation)memoryHandle; |
269 | 0 | return vmaInvalidateAllocation(fAllocator, allocation, offset, size); |
270 | 0 | } |
271 | | |
272 | 0 | std::pair<uint64_t, uint64_t> VulkanAMDMemoryAllocator::totalAllocatedAndUsedMemory() const { |
273 | 0 | VmaTotalStatistics stats; |
274 | 0 | vmaCalculateStatistics(fAllocator, &stats); |
275 | 0 | return {stats.total.statistics.blockBytes, stats.total.statistics.allocationBytes}; |
276 | 0 | } |
277 | | |
278 | | #endif // SK_USE_VMA |
279 | | |
280 | | } // namespace skgpu |