rsx: Avoid on-the-fly ZCULL allocations with unordered_map

2025-04-20 19:45:20 +00:00 · 2022-09-07 22:18:59 +03:00 · 2022-09-07 22:18:59 +03:00 · cd53bb7eff
commit cd53bb7eff
parent 60fc90bb8e
4 changed files with 54 additions and 35 deletions
--- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp
+++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp
@ -308,7 +308,7 @@ void GLGSRender::on_init_thread()
 	}

 	//Occlusion query
-	for (u32 i = 0; i < occlusion_query_count; ++i)
+	for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
 	{
 		GLuint handle = 0;
 		auto &query = m_occlusion_query_data[i];
@ -484,7 +484,7 @@ void GLGSRender::on_exit()

 	m_shader_interpreter.destroy();

-	for (u32 i = 0; i < occlusion_query_count; ++i)
+	for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
 	{
 		auto &query = m_occlusion_query_data[i];
 		query.active = false;
--- a/rpcs3/Emu/RSX/RSXZCULL.cpp
+++ b/rpcs3/Emu/RSX/RSXZCULL.cpp
@ -11,6 +11,11 @@ namespace rsx
 			{
 				m_free_occlusion_pool.push(&query);
 			}
+
+			for (auto& stat : m_statistics_map)
+			{
+				stat.flags = stat.result = 0;
+			}
 		}

 		ZCULL_control::~ZCULL_control()
@ -157,6 +162,8 @@ namespace rsx
 			}

 			auto forwarder = &m_pending_writes.back();
+			m_statistics_map[m_statistics_tag_id].flags |= 1;
+
 			for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++)
 			{
 				if (!It->sink)
@ -272,8 +279,26 @@ namespace rsx
 				m_pending_writes.resize(valid_size);
 			}

-			m_statistics_tag_id++;
-			m_statistics_map[m_statistics_tag_id] = {};
+			if (m_pending_writes.empty())
+			{
+				// Clear can be invoked from flip as a workaround to prevent query leakage.
+				m_statistics_map[m_statistics_tag_id].flags = 0;
+			}
+
+			if (m_statistics_map[m_statistics_tag_id].flags)
+			{
+				m_statistics_tag_id = (m_statistics_tag_id + 1) % max_stat_registers;
+				auto data = m_statistics_map.data() + m_statistics_tag_id;
+
+				if (data->flags != 0)
+				{
+					// This shouldn't happen
+					rsx_log.error("Allocating a new ZCULL statistics slot %u overwrites previous data.", m_statistics_tag_id);
+				}
+
+				// Clear value before use
+				data->result = 0;
+			}
 		}

 		void ZCULL_control::on_draw()
@ -462,13 +487,17 @@ namespace rsx
 				}
 			}

-			//Delete all statistics caches but leave the current one
-			for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); )
+			// Delete all statistics caches but leave the current one
+			const u32 current_index = m_statistics_tag_id;
+			for (u32 index = current_index - 1; index != current_index;)
 			{
-				if (It->first == m_statistics_tag_id)
-					++It;
-				else
-					It = m_statistics_map.erase(It);
+				if (m_statistics_map[index].flags == 0)
+				{
+					break;
+				}
+
+				m_statistics_map[index].flags = 0;
+				index = (index + max_stat_registers - 1) % max_stat_registers;
 			}

 			//Decrement jobs counter
@ -534,22 +563,12 @@ namespace rsx
 				}
 			}

-			u32 stat_tag_to_remove = m_statistics_tag_id;
 			u32 processed = 0;
 			for (auto& writer : m_pending_writes)
 			{
 				if (!writer.sink)
 					break;

-				if (writer.counter_tag != stat_tag_to_remove &&
-					stat_tag_to_remove != m_statistics_tag_id)
-				{
-					//If the stat id is different from this stat id and the queue is advancing,
-					//its guaranteed that the previous tag has no remaining writes as the queue is ordered
-					m_statistics_map.erase(stat_tag_to_remove);
-					stat_tag_to_remove = m_statistics_tag_id;
-				}
-
 				auto query = writer.query;
 				auto& counter = m_statistics_map[writer.counter_tag];

@ -586,15 +605,13 @@ namespace rsx
 					free_query(query);
 				}

-				stat_tag_to_remove = writer.counter_tag;
+				// Release the stat tag for this object. Slots are all or nothing.
+				m_statistics_map[writer.counter_tag].flags = 0;

 				retire(ptimer, &writer, counter.result);
 				processed++;
 			}

-			if (stat_tag_to_remove != m_statistics_tag_id)
-				m_statistics_map.erase(stat_tag_to_remove);
-
 			if (processed)
 			{
 				auto remaining = m_pending_writes.size() - processed;
--- a/rpcs3/Emu/RSX/RSXZCULL.h
+++ b/rpcs3/Emu/RSX/RSXZCULL.h
@ -62,7 +62,7 @@ namespace rsx
 		struct query_stat_counter
 		{
 			u32 result;
-			u32 reserved;
+			u32 flags;
 		};

 		struct sync_hint_payload_t
@ -84,6 +84,15 @@ namespace rsx
 			sync_no_notify = 2   // If set, backend hint notifications will not be made
 		};

+		enum constants
+		{
+			max_zcull_delay_us    = 300,   // Delay before a report update operation is forced to retire
+			min_zcull_tick_us     = 100,   // Default tick duration. To avoid hardware spam, we schedule peeks in multiples of this.
+			occlusion_query_count = 2048,  // Number of occlusion query slots available. Real hardware actually has far fewer units before choking
+			max_safe_queue_depth  = 1792,  // Number of in-flight queries before we start forcefully flushing data from the GPU device.
+			max_stat_registers    = 8192   // Size of the statistics cache
+		};
+
 		class ZCULL_control
 		{
 		private:
@ -97,13 +106,6 @@ namespace rsx
 			void disable_optimizations(class ::rsx::thread* ptimer, u32 location);

 		protected:
-			// Delay before a report update operation is forced to retire
-			const u32 max_zcull_delay_us = 300;
-			const u32 min_zcull_tick_us = 100;
-
-			// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
-			const u32 occlusion_query_count = 2048;
-			const u32 max_safe_queue_depth = 1792;

 			bool unit_enabled = false;           // The ZCULL unit is on
 			bool write_enabled = false;          // A surface in the ZCULL-monitored tile region has been loaded for rasterization
@ -126,7 +128,7 @@ namespace rsx
 			u64 m_timer = 0;

 			std::vector<queued_report_write> m_pending_writes{};
-			std::unordered_map<u32, query_stat_counter> m_statistics_map{};
+			std::array<query_stat_counter, max_stat_registers> m_statistics_map{};

 			// Enables/disables the ZCULL unit
 			void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue);
--- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp
+++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp
@ -420,9 +420,9 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)

 	//Occlusion
 	m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
-	m_occlusion_map.resize(occlusion_query_count);
+	m_occlusion_map.resize(rsx::reports::occlusion_query_count);

-	for (u32 n = 0; n < occlusion_query_count; ++n)
+	for (u32 n = 0; n < rsx::reports::occlusion_query_count; ++n)
 		m_occlusion_query_data[n].driver_handle = n;

 	if (g_cfg.video.precise_zpass_count)