mirror of
				https://github.com/dolphin-emu/dolphin.git
				synced 2025-10-25 01:19:19 +00:00 
			
		
		
		
	This could cause the assertion on line 212 to fail when uploading large amounts of data in between command buffer executions.
		
			
				
	
	
		
			347 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			347 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2016 Dolphin Emulator Project
 | |
| // Licensed under GPLv2+
 | |
| // Refer to the license.txt file included.
 | |
| 
 | |
| #include "VideoBackends/Vulkan/StreamBuffer.h"
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <cstdint>
 | |
| #include <functional>
 | |
| 
 | |
| #include "Common/Assert.h"
 | |
| #include "Common/MsgHandler.h"
 | |
| 
 | |
| #include "VideoBackends/Vulkan/CommandBufferManager.h"
 | |
| #include "VideoBackends/Vulkan/Util.h"
 | |
| #include "VideoBackends/Vulkan/VulkanContext.h"
 | |
| 
 | |
| namespace Vulkan
 | |
| {
 | |
| StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, size_t max_size)
 | |
|     : m_usage(usage), m_maximum_size(max_size)
 | |
| {
 | |
|   // Add a callback that fires on fence point creation and signal
 | |
|   g_command_buffer_mgr->AddFencePointCallback(
 | |
|       this, std::bind(&StreamBuffer::OnCommandBufferQueued, this, std::placeholders::_1,
 | |
|                       std::placeholders::_2),
 | |
|       std::bind(&StreamBuffer::OnCommandBufferExecuted, this, std::placeholders::_1));
 | |
| }
 | |
| 
 | |
| StreamBuffer::~StreamBuffer()
 | |
| {
 | |
|   g_command_buffer_mgr->RemoveFencePointCallback(this);
 | |
| 
 | |
|   if (m_host_pointer)
 | |
|     vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
 | |
| 
 | |
|   if (m_buffer != VK_NULL_HANDLE)
 | |
|     g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
 | |
|   if (m_memory != VK_NULL_HANDLE)
 | |
|     g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
 | |
| }
 | |
| 
 | |
| std::unique_ptr<StreamBuffer> StreamBuffer::Create(VkBufferUsageFlags usage, size_t initial_size,
 | |
|                                                    size_t max_size)
 | |
| {
 | |
|   std::unique_ptr<StreamBuffer> buffer = std::make_unique<StreamBuffer>(usage, max_size);
 | |
|   if (!buffer->ResizeBuffer(initial_size))
 | |
|     return nullptr;
 | |
| 
 | |
|   return buffer;
 | |
| }
 | |
| 
 | |
| bool StreamBuffer::ResizeBuffer(size_t size)
 | |
| {
 | |
|   // Create the buffer descriptor
 | |
|   VkBufferCreateInfo buffer_create_info = {
 | |
|       VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,  // VkStructureType        sType
 | |
|       nullptr,                               // const void*            pNext
 | |
|       0,                                     // VkBufferCreateFlags    flags
 | |
|       static_cast<VkDeviceSize>(size),       // VkDeviceSize           size
 | |
|       m_usage,                               // VkBufferUsageFlags     usage
 | |
|       VK_SHARING_MODE_EXCLUSIVE,             // VkSharingMode          sharingMode
 | |
|       0,                                     // uint32_t               queueFamilyIndexCount
 | |
|       nullptr                                // const uint32_t*        pQueueFamilyIndices
 | |
|   };
 | |
| 
 | |
|   VkBuffer buffer = VK_NULL_HANDLE;
 | |
|   VkResult res =
 | |
|       vkCreateBuffer(g_vulkan_context->GetDevice(), &buffer_create_info, nullptr, &buffer);
 | |
|   if (res != VK_SUCCESS)
 | |
|   {
 | |
|     LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: ");
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Get memory requirements (types etc) for this buffer
 | |
|   VkMemoryRequirements memory_requirements;
 | |
|   vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
 | |
| 
 | |
|   // Aim for a coherent mapping if possible.
 | |
|   u32 memory_type_index = g_vulkan_context->GetUploadMemoryType(memory_requirements.memoryTypeBits,
 | |
|                                                                 &m_coherent_mapping);
 | |
| 
 | |
|   // Allocate memory for backing this buffer
 | |
|   VkMemoryAllocateInfo memory_allocate_info = {
 | |
|       VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,  // VkStructureType    sType
 | |
|       nullptr,                                 // const void*        pNext
 | |
|       memory_requirements.size,                // VkDeviceSize       allocationSize
 | |
|       memory_type_index                        // uint32_t           memoryTypeIndex
 | |
|   };
 | |
|   VkDeviceMemory memory = VK_NULL_HANDLE;
 | |
|   res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_allocate_info, nullptr, &memory);
 | |
|   if (res != VK_SUCCESS)
 | |
|   {
 | |
|     LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: ");
 | |
|     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Bind memory to buffer
 | |
|   res = vkBindBufferMemory(g_vulkan_context->GetDevice(), buffer, memory, 0);
 | |
|   if (res != VK_SUCCESS)
 | |
|   {
 | |
|     LOG_VULKAN_ERROR(res, "vkBindBufferMemory failed: ");
 | |
|     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
 | |
|     vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Map this buffer into user-space
 | |
|   void* mapped_ptr = nullptr;
 | |
|   res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, size, 0, &mapped_ptr);
 | |
|   if (res != VK_SUCCESS)
 | |
|   {
 | |
|     LOG_VULKAN_ERROR(res, "vkMapMemory failed: ");
 | |
|     vkDestroyBuffer(g_vulkan_context->GetDevice(), buffer, nullptr);
 | |
|     vkFreeMemory(g_vulkan_context->GetDevice(), memory, nullptr);
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Unmap current host pointer (if there was a previous buffer)
 | |
|   if (m_host_pointer)
 | |
|     vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory);
 | |
| 
 | |
|   // Destroy the backings for the buffer after the command buffer executes
 | |
|   if (m_buffer != VK_NULL_HANDLE)
 | |
|     g_command_buffer_mgr->DeferBufferDestruction(m_buffer);
 | |
|   if (m_memory != VK_NULL_HANDLE)
 | |
|     g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory);
 | |
| 
 | |
|   // Replace with the new buffer
 | |
|   m_buffer = buffer;
 | |
|   m_memory = memory;
 | |
|   m_host_pointer = reinterpret_cast<u8*>(mapped_ptr);
 | |
|   m_current_size = size;
 | |
|   m_current_offset = 0;
 | |
|   m_current_gpu_position = 0;
 | |
|   m_tracked_fences.clear();
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse /* = true */,
 | |
|                                  bool allow_growth /* = true */,
 | |
|                                  bool reallocate_if_full /* = false */)
 | |
| {
 | |
|   size_t required_bytes = num_bytes + alignment;
 | |
| 
 | |
|   // Check for sane allocations
 | |
|   if (required_bytes > m_maximum_size)
 | |
|   {
 | |
|     PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer",
 | |
|                static_cast<uint32_t>(num_bytes), static_cast<uint32_t>(m_maximum_size));
 | |
| 
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Is the GPU behind or up to date with our current offset?
 | |
|   if (m_current_offset >= m_current_gpu_position)
 | |
|   {
 | |
|     size_t remaining_bytes = m_current_size - m_current_offset;
 | |
|     if (required_bytes <= remaining_bytes)
 | |
|     {
 | |
|       // Place at the current position, after the GPU position.
 | |
|       m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment);
 | |
|       m_last_allocation_size = num_bytes;
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     // Check for space at the start of the buffer
 | |
|     // We use < here because we don't want to have the case of m_current_offset ==
 | |
|     // m_current_gpu_position. That would mean the code above would assume the
 | |
|     // GPU has caught up to us, which it hasn't.
 | |
|     if (allow_reuse && required_bytes < m_current_gpu_position)
 | |
|     {
 | |
|       // Reset offset to zero, since we're allocating behind the gpu now
 | |
|       m_current_offset = 0;
 | |
|       m_last_allocation_size = num_bytes;
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Is the GPU ahead of our current offset?
 | |
|   if (m_current_offset < m_current_gpu_position)
 | |
|   {
 | |
|     // We have from m_current_offset..m_current_gpu_position space to use.
 | |
|     size_t remaining_bytes = m_current_gpu_position - m_current_offset;
 | |
|     if (required_bytes < remaining_bytes)
 | |
|     {
 | |
|       // Place at the current position, since this is still behind the GPU.
 | |
|       m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment);
 | |
|       m_last_allocation_size = num_bytes;
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Try to grow the buffer up to the maximum size before waiting.
 | |
|   // Double each time until the maximum size is reached.
 | |
|   if (allow_growth && m_current_size < m_maximum_size)
 | |
|   {
 | |
|     size_t new_size = std::min(std::max(num_bytes, m_current_size * 2), m_maximum_size);
 | |
|     if (ResizeBuffer(new_size))
 | |
|     {
 | |
|       // Allocating from the start of the buffer.
 | |
|       m_last_allocation_size = new_size;
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Can we find a fence to wait on that will give us enough memory?
 | |
|   if (allow_reuse && WaitForClearSpace(required_bytes))
 | |
|   {
 | |
|     _assert_(m_current_offset == m_current_gpu_position ||
 | |
|              (m_current_offset + required_bytes) < m_current_gpu_position);
 | |
|     m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment);
 | |
|     m_last_allocation_size = num_bytes;
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   // If we are not allowed to execute in our current state (e.g. in the middle of a render pass),
 | |
|   // as a last resort, reallocate the buffer. This will incur a performance hit and is not
 | |
|   // encouraged.
 | |
|   if (reallocate_if_full && ResizeBuffer(m_current_size))
 | |
|   {
 | |
|     m_last_allocation_size = num_bytes;
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   // We tried everything we could, and still couldn't get anything. If we're not at a point
 | |
|   // where the state is known and can be resumed, this is probably a fatal error.
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| void StreamBuffer::CommitMemory(size_t final_num_bytes)
 | |
| {
 | |
|   _assert_((m_current_offset + final_num_bytes) <= m_current_size);
 | |
|   _assert_(final_num_bytes <= m_last_allocation_size);
 | |
| 
 | |
|   // For non-coherent mappings, flush the memory range
 | |
|   if (!m_coherent_mapping)
 | |
|   {
 | |
|     VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, nullptr, m_memory,
 | |
|                                  m_current_offset, final_num_bytes};
 | |
|     vkFlushMappedMemoryRanges(g_vulkan_context->GetDevice(), 1, &range);
 | |
|   }
 | |
| 
 | |
|   m_current_offset += final_num_bytes;
 | |
| }
 | |
| 
 | |
| void StreamBuffer::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence)
 | |
| {
 | |
|   // Don't create a tracking entry if the GPU is caught up with the buffer.
 | |
|   if (m_current_offset == m_current_gpu_position)
 | |
|     return;
 | |
| 
 | |
|   // Has the offset changed since the last fence?
 | |
|   if (!m_tracked_fences.empty() && m_tracked_fences.back().second == m_current_offset)
 | |
|   {
 | |
|     // No need to track the new fence, the old one is sufficient.
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   m_tracked_fences.emplace_back(fence, m_current_offset);
 | |
| }
 | |
| 
 | |
| void StreamBuffer::OnCommandBufferExecuted(VkFence fence)
 | |
| {
 | |
|   // Locate the entry for this fence (if any, we may have been forced to wait already)
 | |
|   auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(),
 | |
|                            [fence](const auto& it) { return it.first == fence; });
 | |
| 
 | |
|   if (iter != m_tracked_fences.end())
 | |
|   {
 | |
|     // Update the GPU position, and remove any fences before this fence (since
 | |
|     // it is implied that they have been signaled as well, though the callback
 | |
|     // should have removed them already).
 | |
|     m_current_gpu_position = iter->second;
 | |
|     m_tracked_fences.erase(m_tracked_fences.begin(), ++iter);
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool StreamBuffer::WaitForClearSpace(size_t num_bytes)
 | |
| {
 | |
|   size_t new_offset = 0;
 | |
|   size_t new_gpu_position = 0;
 | |
|   auto iter = m_tracked_fences.begin();
 | |
|   for (; iter != m_tracked_fences.end(); iter++)
 | |
|   {
 | |
|     // Would this fence bring us in line with the GPU?
 | |
|     // This is the "last resort" case, where a command buffer execution has been forced
 | |
|     // after no additional data has been written to it, so we can assume that after the
 | |
|     // fence has been signaled the entire buffer is now consumed.
 | |
|     size_t gpu_position = iter->second;
 | |
|     if (m_current_offset == gpu_position)
 | |
|     {
 | |
|       // Start at the start of the buffer again.
 | |
|       new_offset = 0;
 | |
|       new_gpu_position = 0;
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     // Assuming that we wait for this fence, are we allocating in front of the GPU?
 | |
|     if (m_current_offset > gpu_position)
 | |
|     {
 | |
|       // We can wrap around to the start, behind the GPU, if there is enough space.
 | |
|       // We use > here because otherwise we'd end up lining up with the GPU, and then the
 | |
|       // allocator would assume that the GPU has consumed what we just wrote.
 | |
|       if (gpu_position > num_bytes)
 | |
|       {
 | |
|         new_offset = 0;
 | |
|         new_gpu_position = gpu_position;
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       // We're currently allocating behind the GPU. This would give us between the current
 | |
|       // offset and the GPU position worth of space to work with. Again, > because we can't
 | |
|       // align the GPU position with the buffer offset.
 | |
|       size_t available_space_inbetween = gpu_position - m_current_offset;
 | |
|       if (available_space_inbetween > num_bytes)
 | |
|       {
 | |
|         // Leave the offset as-is, but update the GPU position.
 | |
|         new_offset = m_current_offset;
 | |
|         new_gpu_position = gpu_position;
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Did any fences satisfy this condition?
 | |
|   if (iter == m_tracked_fences.end())
 | |
|     return false;
 | |
| 
 | |
|   // Wait until this fence is signaled.
 | |
|   VkResult res =
 | |
|       vkWaitForFences(g_vulkan_context->GetDevice(), 1, &iter->first, VK_TRUE, UINT64_MAX);
 | |
|   if (res != VK_SUCCESS)
 | |
|     LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
 | |
| 
 | |
|   // Update GPU position, and remove all fences up to (and including) this fence.
 | |
|   m_current_offset = new_offset;
 | |
|   m_current_gpu_position = new_gpu_position;
 | |
|   m_tracked_fences.erase(m_tracked_fences.begin(), ++iter);
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| }  // namespace Vulkan
 |