From f3593427211a1e860048497ae06ea1462740c18d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 9 Nov 2019 16:14:21 +0300 Subject: [PATCH] rsx: Implement mutable ring buffers with grow support --- rpcs3/Emu/RSX/Common/ring_buffer_helper.h | 18 ++++++--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 2 +- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 45 +++++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKHelpers.h | 14 +++++-- 4 files changed, 69 insertions(+), 10 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h index ccabc24dfc..9da7abca70 100644 --- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h +++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h @@ -10,8 +10,9 @@ * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten. * User has to update the GET pointer when synchronisation happens. */ -struct data_heap +class data_heap { +protected: /** * Does alloc cross get position ? */ @@ -43,6 +44,13 @@ struct data_heap } } + // Grow the buffer to hold at least size bytes + virtual bool grow(size_t size) + { + // Stub + return false; + } + size_t m_size; size_t m_put_pos; // Start of free space size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget @@ -75,15 +83,15 @@ public: template size_t alloc(size_t size) { - if (!can_alloc(size)) + const size_t alloc_size = align(size, Alignment); + const size_t aligned_put_pos = align(m_put_pos, Alignment); + + if (!can_alloc(size) && !grow(aligned_put_pos + alloc_size)) { fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE, m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool); } - size_t alloc_size = align(size, Alignment); - size_t aligned_put_pos = align(m_put_pos, Alignment); - const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size; m_current_allocated_size += block_length; m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8fe090ac3d..080d76e5ab 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2832,7 +2832,7 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait rsx::g_dma_manager.sync(); // TODO: Better check for shadowed memory - if (m_attrib_ring_info.shadow) + //if (m_attrib_ring_info.shadow) { if (m_attrib_ring_info.dirty() || m_fragment_env_ring_info.dirty() || diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 52f80ccdea..6b794d4990 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -127,6 +127,51 @@ namespace vk #endif } + bool data_heap::grow(size_t size) + { + // Create new heap. All sizes are aligned up by 64M, upto 1GiB + const size_t size_limit = 1024 * 0x100000; + const size_t aligned_new_size = align(m_size + size, 64 * 0x100000); + + if (aligned_new_size >= size_limit) + { + // Too large + return false; + } + + if (shadow) + { + // Shadowed. Growing this can be messy as it requires double allocation (macOS only) + return false; + } + + // Wait for DMA activity to end + rsx::g_dma_manager.sync(); + + if (mapped) + { + // Force reset mapping + unmap(true); + } + + VkBufferUsageFlags usage = heap->info.usage; + + const auto device = get_current_renderer(); + const auto& memory_map = device->get_memory_mapping(); + + VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + auto memory_index = memory_map.host_visible_coherent; + + // Update heap information and reset the allocator + ::data_heap::init(aligned_new_size, m_name, m_min_guard_size); + + // Discard old heap and create a new one. Old heap will be garbage collected when no longer needed + get_resource_manager()->dispose(heap); + heap = std::make_unique(*device, aligned_new_size, memory_index, memory_flags, usage, 0); + + return true; + } + memory_type_mapping get_memory_mapping(const vk::physical_device& dev) { VkPhysicalDevice pdev = dev; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index bc84e91b51..2fff476b55 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -109,7 +109,7 @@ namespace vk class image; struct image_view; struct buffer; - struct data_heap; + class data_heap; class mem_allocator_base; struct memory_type_mapping; struct gpu_formats_support; @@ -3384,15 +3384,21 @@ public: }; } - struct data_heap : public ::data_heap + class data_heap : public ::data_heap { - std::unique_ptr heap; + private: bool mapped = false; void *_ptr = nullptr; std::unique_ptr shadow; std::vector dirty_ranges; + protected: + bool grow(size_t size) override; + + public: + std::unique_ptr heap; + // NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow // Avoid mapping/unmapping to keep these drivers from stalling // NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either @@ -3402,7 +3408,7 @@ public: ::data_heap::init(size, name, guard); const auto device = get_current_renderer(); - const auto memory_map = device->get_memory_mapping(); + const auto& memory_map = device->get_memory_mapping(); VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; auto memory_index = memory_map.host_visible_coherent;