From cd53bb7effb79613eb23d957272af3c0895775b7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 7 Sep 2022 22:18:59 +0300 Subject: [PATCH] rsx: Avoid on-the-fly ZCULL allocations with unordered_map --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 4 +-- rpcs3/Emu/RSX/RSXZCULL.cpp | 61 +++++++++++++++++++++------------ rpcs3/Emu/RSX/RSXZCULL.h | 20 ++++++----- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 +-- 4 files changed, 54 insertions(+), 35 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 37f62418d5..e09e8baea3 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -308,7 +308,7 @@ void GLGSRender::on_init_thread() } //Occlusion query - for (u32 i = 0; i < occlusion_query_count; ++i) + for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i) { GLuint handle = 0; auto &query = m_occlusion_query_data[i]; @@ -484,7 +484,7 @@ void GLGSRender::on_exit() m_shader_interpreter.destroy(); - for (u32 i = 0; i < occlusion_query_count; ++i) + for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i) { auto &query = m_occlusion_query_data[i]; query.active = false; diff --git a/rpcs3/Emu/RSX/RSXZCULL.cpp b/rpcs3/Emu/RSX/RSXZCULL.cpp index defcfe765d..5180b953e3 100644 --- a/rpcs3/Emu/RSX/RSXZCULL.cpp +++ b/rpcs3/Emu/RSX/RSXZCULL.cpp @@ -11,6 +11,11 @@ namespace rsx { m_free_occlusion_pool.push(&query); } + + for (auto& stat : m_statistics_map) + { + stat.flags = stat.result = 0; + } } ZCULL_control::~ZCULL_control() @@ -157,6 +162,8 @@ namespace rsx } auto forwarder = &m_pending_writes.back(); + m_statistics_map[m_statistics_tag_id].flags |= 1; + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++) { if (!It->sink) @@ -272,8 +279,26 @@ namespace rsx m_pending_writes.resize(valid_size); } - m_statistics_tag_id++; - m_statistics_map[m_statistics_tag_id] = {}; + if (m_pending_writes.empty()) + { + // Clear can be invoked from flip as a workaround to prevent query leakage. + m_statistics_map[m_statistics_tag_id].flags = 0; + } + + if (m_statistics_map[m_statistics_tag_id].flags) + { + m_statistics_tag_id = (m_statistics_tag_id + 1) % max_stat_registers; + auto data = m_statistics_map.data() + m_statistics_tag_id; + + if (data->flags != 0) + { + // This shouldn't happen + rsx_log.error("Allocating a new ZCULL statistics slot %u overwrites previous data.", m_statistics_tag_id); + } + + // Clear value before use + data->result = 0; + } } void ZCULL_control::on_draw() @@ -462,13 +487,17 @@ namespace rsx } } - //Delete all statistics caches but leave the current one - for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); ) + // Delete all statistics caches but leave the current one + const u32 current_index = m_statistics_tag_id; + for (u32 index = current_index - 1; index != current_index;) { - if (It->first == m_statistics_tag_id) - ++It; - else - It = m_statistics_map.erase(It); + if (m_statistics_map[index].flags == 0) + { + break; + } + + m_statistics_map[index].flags = 0; + index = (index + max_stat_registers - 1) % max_stat_registers; } //Decrement jobs counter @@ -534,22 +563,12 @@ namespace rsx } } - u32 stat_tag_to_remove = m_statistics_tag_id; u32 processed = 0; for (auto& writer : m_pending_writes) { if (!writer.sink) break; - if (writer.counter_tag != stat_tag_to_remove && - stat_tag_to_remove != m_statistics_tag_id) - { - //If the stat id is different from this stat id and the queue is advancing, - //its guaranteed that the previous tag has no remaining writes as the queue is ordered - m_statistics_map.erase(stat_tag_to_remove); - stat_tag_to_remove = m_statistics_tag_id; - } - auto query = writer.query; auto& counter = m_statistics_map[writer.counter_tag]; @@ -586,15 +605,13 @@ namespace rsx free_query(query); } - stat_tag_to_remove = writer.counter_tag; + // Release the stat tag for this object. Slots are all or nothing. + m_statistics_map[writer.counter_tag].flags = 0; retire(ptimer, &writer, counter.result); processed++; } - if (stat_tag_to_remove != m_statistics_tag_id) - m_statistics_map.erase(stat_tag_to_remove); - if (processed) { auto remaining = m_pending_writes.size() - processed; diff --git a/rpcs3/Emu/RSX/RSXZCULL.h b/rpcs3/Emu/RSX/RSXZCULL.h index 0eb65eac3e..b7271fcde0 100644 --- a/rpcs3/Emu/RSX/RSXZCULL.h +++ b/rpcs3/Emu/RSX/RSXZCULL.h @@ -62,7 +62,7 @@ namespace rsx struct query_stat_counter { u32 result; - u32 reserved; + u32 flags; }; struct sync_hint_payload_t @@ -84,6 +84,15 @@ namespace rsx sync_no_notify = 2 // If set, backend hint notifications will not be made }; + enum constants + { + max_zcull_delay_us = 300, // Delay before a report update operation is forced to retire + min_zcull_tick_us = 100, // Default tick duration. To avoid hardware spam, we schedule peeks in multiples of this. + occlusion_query_count = 2048, // Number of occlusion query slots available. Real hardware actually has far fewer units before choking + max_safe_queue_depth = 1792, // Number of in-flight queries before we start forcefully flushing data from the GPU device. + max_stat_registers = 8192 // Size of the statistics cache + }; + class ZCULL_control { private: @@ -97,13 +106,6 @@ namespace rsx void disable_optimizations(class ::rsx::thread* ptimer, u32 location); protected: - // Delay before a report update operation is forced to retire - const u32 max_zcull_delay_us = 300; - const u32 min_zcull_tick_us = 100; - - // Number of occlusion query slots available. Real hardware actually has far fewer units before choking - const u32 occlusion_query_count = 2048; - const u32 max_safe_queue_depth = 1792; bool unit_enabled = false; // The ZCULL unit is on bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization @@ -126,7 +128,7 @@ namespace rsx u64 m_timer = 0; std::vector m_pending_writes{}; - std::unordered_map m_statistics_map{}; + std::array m_statistics_map{}; // Enables/disables the ZCULL unit void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 25f5363dcf..c2f7c5cf8b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -420,9 +420,9 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) //Occlusion m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); - m_occlusion_map.resize(occlusion_query_count); + m_occlusion_map.resize(rsx::reports::occlusion_query_count); - for (u32 n = 0; n < occlusion_query_count; ++n) + for (u32 n = 0; n < rsx::reports::occlusion_query_count; ++n) m_occlusion_query_data[n].driver_handle = n; if (g_cfg.video.precise_zpass_count)