rsx: Synchronization improvements

- Always flush the primary queue and wait if not involking readback from rsx thread -- Should fix some instances of device_lost when using WCB -- Marked remaining case as TODO -- TODO: optimize amount of time rsx waits for external threads trying to read
2025-04-20 11:36:13 +00:00 · 2018-01-15 22:28:25 +03:00 · 2018-01-15 22:28:25 +03:00 · 9ec2337192
commit 9ec2337192
parent cbc8bf01a1
3 changed files with 154 additions and 127 deletions
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -811,7 +811,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)

 		if (!is_rsxthr)
 		{
+			//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
 			vm::temporary_unlock();
+
+			std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
+
+			m_flush_requests.post(sync_timestamp == 0ull);
+			has_queue_ref = true;
 		}
 		else
 		{
@ -821,67 +827,36 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)

 		if (sync_timestamp > 0)
 		{
-			//Wait for any cb submitted after the sync timestamp to finish
-			while (true)
+			//Wait for earliest cb submitted after the sync timestamp to finish
+			command_buffer_chunk *target_cb = nullptr;
+			for (auto &cb : m_primary_cb_list)
 			{
-				u32 pending = 0;
-
-				if (m_last_flushable_cb < 0)
-					break;
-
-				for (auto &cb : m_primary_cb_list)
+				if (cb.pending && cb.last_sync >= sync_timestamp)
 				{
-					if (!cb.pending && cb.last_sync >= sync_timestamp)
-					{
-						pending = 0;
-						break;
-					}
-
-					if (cb.pending)
-					{
-						pending++;
-
-						if (is_rsxthr)
-							cb.poke();
-					}
+					if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
+						target_cb = &cb;
 				}
-
-				if (!pending)
-					break;
-
-				std::this_thread::yield();
 			}

+			if (target_cb)
+				target_cb->wait();
+
 			if (is_rsxthr)
 				m_last_flushable_cb = -1;
 		}
-		else
+
+		if (has_queue_ref)
 		{
-			if (!is_rsxthr)
-			{
-				{
-					std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
-
-					m_flush_commands = true;
-					m_queued_threads++;
-				}
-
-				//Wait for the RSX thread to process
-				while (m_flush_commands)
-				{
-					_mm_lfence();
-					_mm_pause();
-				}
-
-				has_queue_ref = true;
-			}
+			//Wait for the RSX thread to process request if it hasn't already
+			m_flush_requests.producer_wait();
 		}

 		m_texture_cache.flush_all(result, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());

 		if (has_queue_ref)
 		{
-			m_queued_threads--;
+			//Release RSX thread
+			m_flush_requests.remove_one();
 		}
 	}

@ -1855,7 +1830,7 @@ void VKGSRender::flush_command_queue(bool hard_sync)
 		}

 		m_last_flushable_cb = -1;
-		m_flush_commands = false;
+		m_flush_requests.clear_pending_flag();
 	}
 	else
 	{
@ -2037,15 +2012,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)

 void VKGSRender::do_local_task(bool idle)
 {
-	//TODO: Guard this
-	if (m_overlay_cleanup_requests.size())
-	{
-		flush_command_queue(true);
-		m_ui_renderer->remove_temp_resources();
-		m_overlay_cleanup_requests.clear();
-	}
-
-	if (m_flush_commands)
+	if (m_flush_requests.pending())
 	{
 		std::lock_guard<std::mutex> lock(m_flush_queue_mutex);

@ -2053,12 +2020,8 @@ void VKGSRender::do_local_task(bool idle)
 		//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
 		flush_command_queue();

-		m_flush_commands = false;
-		while (m_queued_threads)
-		{
-			_mm_lfence();
-			_mm_pause();
-		}
+		m_flush_requests.clear_pending_flag();
+		m_flush_requests.consumer_wait();
 	}

 	if (m_last_flushable_cb > -1)
@ -2151,7 +2114,14 @@ void VKGSRender::do_local_task(bool idle)

 #endif

-	if (m_custom_ui)
+	//TODO: Guard this
+	if (m_overlay_cleanup_requests.size())
+	{
+		flush_command_queue(true);
+		m_ui_renderer->remove_temp_resources();
+		m_overlay_cleanup_requests.clear();
+	}
+	else if (m_custom_ui)
 	{
 		if (!in_begin_end && native_ui_flip_request.load())
 		{
--- a/rpcs3/Emu/RSX/VK/VKGSRender.h
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.h
@ -43,6 +43,7 @@ struct command_buffer_chunk: public vk::command_buffer

 	std::atomic_bool pending = { false };
 	std::atomic<u64> last_sync = { 0 };
+	std::mutex guard_mutex;

 	command_buffer_chunk()
 	{}
@ -84,8 +85,13 @@ struct command_buffer_chunk: public vk::command_buffer
 	{
 		if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
 		{
-			vkResetFences(m_device, 1, &submit_fence);
-			pending = false;
+			std::lock_guard<std::mutex> lock(guard_mutex);
+
+			if (pending)
+			{
+				vkResetFences(m_device, 1, &submit_fence);
+				pending = false;
+			}
 		}

 		return !pending;
@ -93,6 +99,8 @@ struct command_buffer_chunk: public vk::command_buffer

 	void wait()
 	{
+		std::lock_guard<std::mutex> lock(guard_mutex);
+
 		if (!pending)
 			return;

@ -116,6 +124,114 @@ struct occlusion_data
 	command_buffer_chunk* command_buffer_to_wait = nullptr;
 };

+struct frame_context_t
+{
+	VkSemaphore present_semaphore = VK_NULL_HANDLE;
+	VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
+	vk::descriptor_pool descriptor_pool;
+	u32 used_descriptors = 0;
+
+	std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
+	std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
+
+	u32 present_image = UINT32_MAX;
+	command_buffer_chunk* swap_command_buffer = nullptr;
+
+	//Heap pointers
+	s64 attrib_heap_ptr = 0;
+	s64 ubo_heap_ptr = 0;
+	s64 index_heap_ptr = 0;
+	s64 texture_upload_heap_ptr = 0;
+
+	u64 last_frame_sync_time = 0;
+
+	//Copy shareable information
+	void grab_resources(frame_context_t &other)
+	{
+		present_semaphore = other.present_semaphore;
+		descriptor_set = other.descriptor_set;
+		descriptor_pool = other.descriptor_pool;
+		used_descriptors = other.used_descriptors;
+
+		attrib_heap_ptr = other.attrib_heap_ptr;
+		ubo_heap_ptr = other.attrib_heap_ptr;
+		index_heap_ptr = other.attrib_heap_ptr;
+		texture_upload_heap_ptr = other.texture_upload_heap_ptr;
+	}
+
+	//Exchange storage (non-copyable)
+	void swap_storage(frame_context_t &other)
+	{
+		std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
+		std::swap(samplers_to_clean, other.samplers_to_clean);
+	}
+
+	void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc)
+	{
+		attrib_heap_ptr = attrib_loc;
+		ubo_heap_ptr = ubo_loc;
+		index_heap_ptr = index_loc;
+		texture_upload_heap_ptr = texture_loc;
+
+		last_frame_sync_time = get_system_time();
+	}
+
+	void reset_heap_ptrs()
+	{
+		last_frame_sync_time = 0;
+	}
+};
+
+struct flush_request_task
+{
+	atomic_t<bool> pending_state{ false };  //Flush request status; true if rsx::thread is yet to service this request
+	atomic_t<int> num_waiters{ 0 };  //Number of threads waiting for this request to be serviced
+	bool hard_sync = false;
+
+	flush_request_task(){}
+
+	void post(bool _hard_sync)
+	{
+		hard_sync = (hard_sync || _hard_sync);
+		pending_state = true;
+		num_waiters++;
+	}
+
+	void remove_one()
+	{
+		num_waiters--;
+	}
+
+	void clear_pending_flag()
+	{
+		hard_sync = false;
+		pending_state.store(false);
+	}
+
+	bool pending() const
+	{
+		return pending_state.load();
+	}
+
+	void consumer_wait() const
+	{
+		while (num_waiters.load() != 0)
+		{
+			_mm_lfence();
+			_mm_pause();
+		}
+	}
+
+	void producer_wait() const
+	{
+		while (pending_state.load())
+		{
+			_mm_lfence();
+			_mm_pause();
+		}
+	}
+};
+
 class VKGSRender : public GSRender
 {
 private:
@ -191,64 +307,6 @@ private:
 	vk::vk_data_heap m_index_buffer_ring_info;
 	vk::vk_data_heap m_texture_upload_buffer_ring_info;

-	struct frame_context_t
-	{
-		VkSemaphore present_semaphore = VK_NULL_HANDLE;
-		VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
-		vk::descriptor_pool descriptor_pool;
-		u32 used_descriptors = 0;
-
-		std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
-		std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
-
-		u32 present_image = UINT32_MAX;
-		command_buffer_chunk* swap_command_buffer = nullptr;
-
-		//Heap pointers
-		s64 attrib_heap_ptr = 0;
-		s64 ubo_heap_ptr = 0;
-		s64 index_heap_ptr = 0;
-		s64 texture_upload_heap_ptr = 0;
-
-		u64 last_frame_sync_time = 0;
-
-		//Copy shareable information
-		void grab_resources(frame_context_t &other)
-		{
-			present_semaphore = other.present_semaphore;
-			descriptor_set = other.descriptor_set;
-			descriptor_pool = other.descriptor_pool;
-			used_descriptors = other.used_descriptors;
-
-			attrib_heap_ptr = other.attrib_heap_ptr;
-			ubo_heap_ptr = other.attrib_heap_ptr;
-			index_heap_ptr = other.attrib_heap_ptr;
-			texture_upload_heap_ptr = other.texture_upload_heap_ptr;
-		}
-
-		//Exchange storage (non-copyable)
-		void swap_storage(frame_context_t &other)
-		{
-			std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
-			std::swap(samplers_to_clean, other.samplers_to_clean);
-		}
-
-		void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc)
-		{
-			attrib_heap_ptr = attrib_loc;
-			ubo_heap_ptr = ubo_loc;
-			index_heap_ptr = index_loc;
-			texture_upload_heap_ptr = texture_loc;
-
-			last_frame_sync_time = get_system_time();
-		}
-
-		void reset_heap_ptrs()
-		{
-			last_frame_sync_time = 0;
-		}
-	};
-
 	std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
 	//Temp frame context to use if the real frame queue is overburdened. Only used for storage
 	frame_context_t m_aux_frame_context;
@ -277,8 +335,7 @@ private:
 	std::atomic<int> m_last_flushable_cb = {-1 };
 	
 	std::mutex m_flush_queue_mutex;
-	std::atomic<bool> m_flush_commands = { false };
-	std::atomic<int> m_queued_threads = { 0 };
+	flush_request_task m_flush_requests;

 	std::thread::id rsx_thread;
 	std::atomic<u64> m_last_sync_event = { 0 };
--- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
@ -13,8 +13,8 @@ namespace vk

 	VkSampler g_null_sampler = nullptr;

-	bool g_cb_no_interrupt_flag = false;
-	bool g_drv_no_primitive_restart_flag = false;
+	atomic_t<bool> g_cb_no_interrupt_flag { false };
+	atomic_t<bool> g_drv_no_primitive_restart_flag { false };

 	u64 g_num_processed_frames = 0;
 	u64 g_num_total_frames = 0;