From f3593427211a1e860048497ae06ea1462740c18d Mon Sep 17 00:00:00 2001
From: kd-11 <karokidii@gmail.com>
Date: Sat, 9 Nov 2019 16:14:21 +0300
Subject: [PATCH] rsx: Implement mutable ring buffers with grow support

---
 rpcs3/Emu/RSX/Common/ring_buffer_helper.h | 18 ++++++---
 rpcs3/Emu/RSX/VK/VKGSRender.cpp           |  2 +-
 rpcs3/Emu/RSX/VK/VKHelpers.cpp            | 45 +++++++++++++++++++++++
 rpcs3/Emu/RSX/VK/VKHelpers.h              | 14 +++++--
 4 files changed, 69 insertions(+), 10 deletions(-)
diff --git a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
index ccabc24dfc..9da7abca70 100644
--- a/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
+++ b/rpcs3/Emu/RSX/Common/ring_buffer_helper.h
@@ -10,8 +10,9 @@
  * Space between GET and PUT is used by the GPU ; this structure check that this memory is not overwritten.
  * User has to update the GET pointer when synchronisation happens.
  */
-struct data_heap
+class data_heap
 {
+protected:
 	/**
 	* Does alloc cross get position ?
 	*/
@@ -43,6 +44,13 @@ struct data_heap
 		}
 	}
 
+    // Grow the buffer to hold at least size bytes
+	virtual bool grow(size_t size)
+	{
+		// Stub
+		return false;
+	}
+
 	size_t m_size;
 	size_t m_put_pos; // Start of free space
 	size_t m_min_guard_size; //If an allocation touches the guard region, reset the heap to avoid going over budget
@@ -75,15 +83,15 @@ public:
 	template<int Alignment>
 	size_t alloc(size_t size)
 	{
-		if (!can_alloc<Alignment>(size))
+		const size_t alloc_size = align(size, Alignment);
+		const size_t aligned_put_pos = align(m_put_pos, Alignment);
+
+		if (!can_alloc<Alignment>(size) && !grow(aligned_put_pos + alloc_size))
 		{
 			fmt::throw_exception("[%s] Working buffer not big enough, buffer_length=%d allocated=%d requested=%d guard=%d largest_pool=%d" HERE,
 					m_name, m_size, m_current_allocated_size, size, m_min_guard_size, m_largest_allocated_pool);
 		}
 
-		size_t alloc_size = align(size, Alignment);
-		size_t aligned_put_pos = align(m_put_pos, Alignment);
-
 		const size_t block_length = (aligned_put_pos - m_put_pos) + alloc_size;
 		m_current_allocated_size += block_length;
 		m_largest_allocated_pool = std::max(m_largest_allocated_pool, block_length);
diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
index 8fe090ac3d..080d76e5ab 100644
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@@ -2832,7 +2832,7 @@ void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait
 	rsx::g_dma_manager.sync();
 
 	// TODO: Better check for shadowed memory
-	if (m_attrib_ring_info.shadow)
+	//if (m_attrib_ring_info.shadow)
 	{
 		if (m_attrib_ring_info.dirty() ||
 			m_fragment_env_ring_info.dirty() ||
diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
index 52f80ccdea..6b794d4990 100644
--- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
@@ -127,6 +127,51 @@ namespace vk
 #endif
 	}
 
+	bool data_heap::grow(size_t size)
+	{
+		// Create new heap. All sizes are aligned up by 64M, upto 1GiB
+		const size_t size_limit = 1024 * 0x100000;
+		const size_t aligned_new_size = align(m_size + size, 64 * 0x100000);
+
+		if (aligned_new_size >= size_limit)
+		{
+			// Too large
+			return false;
+		}
+
+		if (shadow)
+		{
+			// Shadowed. Growing this can be messy as it requires double allocation (macOS only)
+			return false;
+		}
+
+		// Wait for DMA activity to end
+		rsx::g_dma_manager.sync();
+
+		if (mapped)
+		{
+			// Force reset mapping
+			unmap(true);
+		}
+
+		VkBufferUsageFlags usage = heap->info.usage;
+
+		const auto device = get_current_renderer();
+		const auto& memory_map = device->get_memory_mapping();
+
+		VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+		auto memory_index = memory_map.host_visible_coherent;
+
+		// Update heap information and reset the allocator
+		::data_heap::init(aligned_new_size, m_name, m_min_guard_size);
+
+		// Discard old heap and create a new one. Old heap will be garbage collected when no longer needed
+		get_resource_manager()->dispose(heap);
+		heap = std::make_unique<buffer>(*device, aligned_new_size, memory_index, memory_flags, usage, 0);
+
+		return true;
+	}
+
 	memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
 	{
 		VkPhysicalDevice pdev = dev;
diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h
index bc84e91b51..2fff476b55 100644
--- a/rpcs3/Emu/RSX/VK/VKHelpers.h
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.h
@@ -109,7 +109,7 @@ namespace vk
 	class image;
 	struct image_view;
 	struct buffer;
-	struct data_heap;
+	class data_heap;
 	class mem_allocator_base;
 	struct memory_type_mapping;
 	struct gpu_formats_support;
@@ -3384,15 +3384,21 @@ public:
 		};
 	}
 
-	struct data_heap : public ::data_heap
+	class data_heap : public ::data_heap
 	{
-		std::unique_ptr<buffer> heap;
+	private:
 		bool mapped = false;
 		void *_ptr = nullptr;
 
 		std::unique_ptr<buffer> shadow;
 		std::vector<VkBufferCopy> dirty_ranges;
 
+	protected:
+		bool grow(size_t size) override;
+
+	public:
+		std::unique_ptr<buffer> heap;
+
 		// NOTE: Some drivers (RADV) use heavyweight OS map/unmap routines that are insanely slow
 		// Avoid mapping/unmapping to keep these drivers from stalling
 		// NOTE2: HOST_CACHED flag does not keep the mapped ptr around in the driver either
@@ -3402,7 +3408,7 @@ public:
 			::data_heap::init(size, name, guard);
 
 			const auto device = get_current_renderer();
-			const auto memory_map = device->get_memory_mapping();
+			const auto& memory_map = device->get_memory_mapping();
 
 			VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
 			auto memory_index = memory_map.host_visible_coherent;