rsx: Avoid on-the-fly ZCULL allocations with unordered_map

This commit is contained in:
kd-11 2022-09-07 22:18:59 +03:00 committed by kd-11
parent 60fc90bb8e
commit cd53bb7eff
4 changed files with 54 additions and 35 deletions

View file

@ -308,7 +308,7 @@ void GLGSRender::on_init_thread()
}
//Occlusion query
for (u32 i = 0; i < occlusion_query_count; ++i)
for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{
GLuint handle = 0;
auto &query = m_occlusion_query_data[i];
@ -484,7 +484,7 @@ void GLGSRender::on_exit()
m_shader_interpreter.destroy();
for (u32 i = 0; i < occlusion_query_count; ++i)
for (u32 i = 0; i < rsx::reports::occlusion_query_count; ++i)
{
auto &query = m_occlusion_query_data[i];
query.active = false;

View file

@ -11,6 +11,11 @@ namespace rsx
{
m_free_occlusion_pool.push(&query);
}
for (auto& stat : m_statistics_map)
{
stat.flags = stat.result = 0;
}
}
ZCULL_control::~ZCULL_control()
@ -157,6 +162,8 @@ namespace rsx
}
auto forwarder = &m_pending_writes.back();
m_statistics_map[m_statistics_tag_id].flags |= 1;
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++)
{
if (!It->sink)
@ -272,8 +279,26 @@ namespace rsx
m_pending_writes.resize(valid_size);
}
m_statistics_tag_id++;
m_statistics_map[m_statistics_tag_id] = {};
if (m_pending_writes.empty())
{
// Clear can be invoked from flip as a workaround to prevent query leakage.
m_statistics_map[m_statistics_tag_id].flags = 0;
}
if (m_statistics_map[m_statistics_tag_id].flags)
{
m_statistics_tag_id = (m_statistics_tag_id + 1) % max_stat_registers;
auto data = m_statistics_map.data() + m_statistics_tag_id;
if (data->flags != 0)
{
// This shouldn't happen
rsx_log.error("Allocating a new ZCULL statistics slot %u overwrites previous data.", m_statistics_tag_id);
}
// Clear value before use
data->result = 0;
}
}
void ZCULL_control::on_draw()
@ -462,13 +487,17 @@ namespace rsx
}
}
//Delete all statistics caches but leave the current one
for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); )
// Delete all statistics caches but leave the current one
const u32 current_index = m_statistics_tag_id;
for (u32 index = current_index - 1; index != current_index;)
{
if (It->first == m_statistics_tag_id)
++It;
else
It = m_statistics_map.erase(It);
if (m_statistics_map[index].flags == 0)
{
break;
}
m_statistics_map[index].flags = 0;
index = (index + max_stat_registers - 1) % max_stat_registers;
}
//Decrement jobs counter
@ -534,22 +563,12 @@ namespace rsx
}
}
u32 stat_tag_to_remove = m_statistics_tag_id;
u32 processed = 0;
for (auto& writer : m_pending_writes)
{
if (!writer.sink)
break;
if (writer.counter_tag != stat_tag_to_remove &&
stat_tag_to_remove != m_statistics_tag_id)
{
//If the stat id is different from this stat id and the queue is advancing,
//its guaranteed that the previous tag has no remaining writes as the queue is ordered
m_statistics_map.erase(stat_tag_to_remove);
stat_tag_to_remove = m_statistics_tag_id;
}
auto query = writer.query;
auto& counter = m_statistics_map[writer.counter_tag];
@ -586,15 +605,13 @@ namespace rsx
free_query(query);
}
stat_tag_to_remove = writer.counter_tag;
// Release the stat tag for this object. Slots are all or nothing.
m_statistics_map[writer.counter_tag].flags = 0;
retire(ptimer, &writer, counter.result);
processed++;
}
if (stat_tag_to_remove != m_statistics_tag_id)
m_statistics_map.erase(stat_tag_to_remove);
if (processed)
{
auto remaining = m_pending_writes.size() - processed;

View file

@ -62,7 +62,7 @@ namespace rsx
struct query_stat_counter
{
u32 result;
u32 reserved;
u32 flags;
};
struct sync_hint_payload_t
@ -84,6 +84,15 @@ namespace rsx
sync_no_notify = 2 // If set, backend hint notifications will not be made
};
enum constants
{
max_zcull_delay_us = 300, // Delay before a report update operation is forced to retire
min_zcull_tick_us = 100, // Default tick duration. To avoid hardware spam, we schedule peeks in multiples of this.
occlusion_query_count = 2048, // Number of occlusion query slots available. Real hardware actually has far fewer units before choking
max_safe_queue_depth = 1792, // Number of in-flight queries before we start forcefully flushing data from the GPU device.
max_stat_registers = 8192 // Size of the statistics cache
};
class ZCULL_control
{
private:
@ -97,13 +106,6 @@ namespace rsx
void disable_optimizations(class ::rsx::thread* ptimer, u32 location);
protected:
// Delay before a report update operation is forced to retire
const u32 max_zcull_delay_us = 300;
const u32 min_zcull_tick_us = 100;
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
const u32 occlusion_query_count = 2048;
const u32 max_safe_queue_depth = 1792;
bool unit_enabled = false; // The ZCULL unit is on
bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization
@ -126,7 +128,7 @@ namespace rsx
u64 m_timer = 0;
std::vector<queued_report_write> m_pending_writes{};
std::unordered_map<u32, query_stat_counter> m_statistics_map{};
std::array<query_stat_counter, max_stat_registers> m_statistics_map{};
// Enables/disables the ZCULL unit
void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue);

View file

@ -420,9 +420,9 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
//Occlusion
m_occlusion_query_manager = std::make_unique<vk::query_pool_manager>(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE);
m_occlusion_map.resize(occlusion_query_count);
m_occlusion_map.resize(rsx::reports::occlusion_query_count);
for (u32 n = 0; n < occlusion_query_count; ++n)
for (u32 n = 0; n < rsx::reports::occlusion_query_count; ++n)
m_occlusion_query_data[n].driver_handle = n;
if (g_cfg.video.precise_zpass_count)