mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-21 03:55:32 +00:00
rsx: ZCULL rewrite and other improvements
- ZCULL unit emulation rewritten - ZCULL reports are now deferred avoiding pipeline stalls - Minor optimizations; replaced std::mutex with shared_mutex where contention is rare - Silence unnecessary error message - Small improvement to out of memory handling for vulkan and slightly bump vertex buffer heap
This commit is contained in:
parent
dece1e01f4
commit
315798b1f4
11 changed files with 697 additions and 321 deletions
|
@ -7,7 +7,7 @@
|
|||
|
||||
void data_cache::store_and_protect_data(u64 key, u32 start, size_t size, u8 format, size_t w, size_t h, size_t d, size_t m, ComPtr<ID3D12Resource> data)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mut);
|
||||
std::lock_guard<shared_mutex> lock(m_mut);
|
||||
m_address_to_data[key] = std::make_pair(texture_entry(format, w, h, d, m), data);
|
||||
protect_data(key, start, size);
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ void data_cache::protect_data(u64 key, u32 start, size_t size)
|
|||
bool data_cache::invalidate_address(u32 addr)
|
||||
{
|
||||
// In case 2 threads write to texture memory
|
||||
std::lock_guard<std::mutex> lock(m_mut);
|
||||
std::lock_guard<shared_mutex> lock(m_mut);
|
||||
bool handled = false;
|
||||
auto It = m_protected_ranges.begin(), E = m_protected_ranges.end();
|
||||
for (; It != E;)
|
||||
|
@ -49,7 +49,7 @@ bool data_cache::invalidate_address(u32 addr)
|
|||
|
||||
std::pair<texture_entry, ComPtr<ID3D12Resource> > *data_cache::find_data_if_available(u64 key)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mut);
|
||||
std::lock_guard<shared_mutex> lock(m_mut);
|
||||
auto It = m_address_to_data.find(key);
|
||||
if (It == m_address_to_data.end())
|
||||
return nullptr;
|
||||
|
@ -58,7 +58,7 @@ std::pair<texture_entry, ComPtr<ID3D12Resource> > *data_cache::find_data_if_avai
|
|||
|
||||
void data_cache::unprotect_all()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mut);
|
||||
std::lock_guard<shared_mutex> lock(m_mut);
|
||||
for (auto &protectedTexture : m_protected_ranges)
|
||||
{
|
||||
u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture);
|
||||
|
|
|
@ -98,7 +98,7 @@ private:
|
|||
* Memory protection fault catch can be generated by any thread and
|
||||
* modifies it.
|
||||
*/
|
||||
std::mutex m_mut;
|
||||
shared_mutex m_mut;
|
||||
|
||||
std::unordered_map<u64, std::pair<texture_entry, ComPtr<ID3D12Resource>> > m_address_to_data; // Storage
|
||||
std::list <std::tuple<u64, u32, u32> > m_protected_ranges; // address, start of protected range, size of protected range
|
||||
|
|
|
@ -209,7 +209,7 @@ void GLGSRender::end()
|
|||
{
|
||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
void* unused = nullptr;
|
||||
bool update_framebuffer_sourced = false;
|
||||
|
||||
|
@ -598,6 +598,7 @@ void GLGSRender::set_viewport()
|
|||
void GLGSRender::on_init_thread()
|
||||
{
|
||||
GSRender::on_init_thread();
|
||||
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
|
||||
|
||||
gl::init();
|
||||
|
||||
|
@ -768,7 +769,7 @@ void GLGSRender::on_init_thread()
|
|||
for (u32 i = 0; i < occlusion_query_count; ++i)
|
||||
{
|
||||
GLuint handle = 0;
|
||||
auto &query = occlusion_query_data[i];
|
||||
auto &query = m_occlusion_query_data[i];
|
||||
glGenQueries(1, &handle);
|
||||
|
||||
query.driver_handle = (u64)handle;
|
||||
|
@ -853,6 +854,8 @@ void GLGSRender::on_init_thread()
|
|||
|
||||
void GLGSRender::on_exit()
|
||||
{
|
||||
zcull_ctrl.release();
|
||||
|
||||
m_prog_buffer.clear();
|
||||
|
||||
if (draw_fbo)
|
||||
|
@ -920,7 +923,7 @@ void GLGSRender::on_exit()
|
|||
|
||||
for (u32 i = 0; i < occlusion_query_count; ++i)
|
||||
{
|
||||
auto &query = occlusion_query_data[i];
|
||||
auto &query = m_occlusion_query_data[i];
|
||||
query.active = false;
|
||||
query.pending = false;
|
||||
|
||||
|
@ -1424,7 +1427,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
return false;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
|
||||
|
@ -1452,7 +1455,7 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
|||
{
|
||||
m_gl_texture_cache.purge_dirty();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
|
@ -1464,7 +1467,7 @@ void GLGSRender::do_local_task(bool /*idle*/)
|
|||
|
||||
if (!work_queue.empty())
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_guard);
|
||||
std::lock_guard<shared_mutex> lock(queue_guard);
|
||||
|
||||
work_queue.remove_if([](work_item &q) { return q.received; });
|
||||
|
||||
|
@ -1505,7 +1508,7 @@ void GLGSRender::do_local_task(bool /*idle*/)
|
|||
|
||||
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_guard);
|
||||
std::lock_guard<shared_mutex> lock(queue_guard);
|
||||
|
||||
work_queue.emplace_back();
|
||||
work_item &result = work_queue.back();
|
||||
|
@ -1537,31 +1540,38 @@ void GLGSRender::notify_tile_unbound(u32 tile)
|
|||
//m_rtts.invalidate_surface_address(addr, false);
|
||||
}
|
||||
|
||||
void GLGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
|
||||
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
query->result = 0;
|
||||
glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle);
|
||||
}
|
||||
|
||||
void GLGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
|
||||
void GLGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
||||
if (query->num_draws)
|
||||
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
||||
}
|
||||
|
||||
bool GLGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
|
||||
bool GLGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
if (!query->num_draws)
|
||||
return true;
|
||||
|
||||
GLint status = GL_TRUE;
|
||||
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status);
|
||||
|
||||
return status != GL_FALSE;
|
||||
}
|
||||
|
||||
void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
|
||||
void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
GLint result;
|
||||
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
|
||||
if (query->num_draws)
|
||||
{
|
||||
GLint result;
|
||||
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
|
||||
|
||||
query->result += result;
|
||||
query->result += result;
|
||||
}
|
||||
}
|
||||
|
||||
void GLGSRender::shell_do_cleanup()
|
||||
|
|
|
@ -265,7 +265,7 @@ struct driver_state
|
|||
}
|
||||
};
|
||||
|
||||
class GLGSRender : public GSRender
|
||||
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
||||
{
|
||||
private:
|
||||
GLFragmentProgram m_fragment_prog;
|
||||
|
@ -311,7 +311,7 @@ private:
|
|||
|
||||
std::vector<u64> m_overlay_cleanup_requests;
|
||||
|
||||
std::mutex queue_guard;
|
||||
shared_mutex queue_guard;
|
||||
std::list<work_item> work_queue;
|
||||
|
||||
bool flush_draw_buffers = false;
|
||||
|
@ -327,7 +327,7 @@ private:
|
|||
//vaos are mandatory for core profile
|
||||
gl::vao m_vao;
|
||||
|
||||
std::mutex m_sampler_mutex;
|
||||
shared_mutex m_sampler_mutex;
|
||||
u64 surface_store_tag = 0;
|
||||
std::atomic_bool m_samplers_dirty = {true};
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
||||
|
@ -363,10 +363,10 @@ public:
|
|||
|
||||
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;
|
||||
|
||||
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
|
||||
void end_occlusion_query(rsx::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
|
||||
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
|
||||
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
||||
|
||||
protected:
|
||||
void begin() override;
|
||||
|
|
|
@ -318,8 +318,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
}
|
||||
}
|
||||
|
||||
if ((window_clip_width && window_clip_width != clip_horizontal) ||
|
||||
(window_clip_height && window_clip_height != clip_vertical))
|
||||
if ((window_clip_width && window_clip_width < clip_horizontal) ||
|
||||
(window_clip_height && window_clip_height < clip_vertical))
|
||||
{
|
||||
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
|
||||
window_clip_width, window_clip_height, clip_horizontal, clip_vertical);
|
||||
|
@ -428,7 +428,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
|
|||
framebuffer_status_valid = draw_fbo.check();
|
||||
if (!framebuffer_status_valid) return;
|
||||
|
||||
check_zcull_status(true, false);
|
||||
check_zcull_status(true);
|
||||
set_viewport();
|
||||
|
||||
switch (rsx::method_registers.surface_color_target())
|
||||
|
|
|
@ -349,8 +349,8 @@ namespace rsx
|
|||
|
||||
element_push_buffer.resize(0);
|
||||
|
||||
if (zcull_task_queue.active_query && zcull_task_queue.active_query->active)
|
||||
zcull_task_queue.active_query->num_draws++;
|
||||
if (zcull_ctrl->active)
|
||||
zcull_ctrl->on_draw();
|
||||
|
||||
if (capture_current_frame)
|
||||
{
|
||||
|
@ -365,6 +365,12 @@ namespace rsx
|
|||
|
||||
reset();
|
||||
|
||||
if (!zcull_ctrl)
|
||||
{
|
||||
//Backend did not provide an implementation, provide NULL object
|
||||
zcull_ctrl = std::make_unique<::rsx::reports::ZCULL_control>();
|
||||
}
|
||||
|
||||
last_flip_time = get_system_time() - 1000000;
|
||||
|
||||
thread_ctrl::spawn(m_vblank_thread, "VBlank Thread", [this]()
|
||||
|
@ -503,6 +509,9 @@ namespace rsx
|
|||
//Execute backend-local tasks first
|
||||
do_local_task(ctrl->put.load() == internal_get.load());
|
||||
|
||||
//Update sub-units
|
||||
zcull_ctrl->update(this);
|
||||
|
||||
//Set up restore state if needed
|
||||
if (sync_point_request)
|
||||
{
|
||||
|
@ -1140,6 +1149,12 @@ namespace rsx
|
|||
|
||||
void thread::do_internal_task()
|
||||
{
|
||||
if (zcull_ctrl->has_pending())
|
||||
{
|
||||
zcull_ctrl->sync(this);
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_internal_tasks.empty())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
|
@ -1147,7 +1162,7 @@ namespace rsx
|
|||
else
|
||||
{
|
||||
fmt::throw_exception("Disabled" HERE);
|
||||
//std::lock_guard<std::mutex> lock{ m_mtx_task };
|
||||
//std::lock_guard<shared_mutex> lock{ m_mtx_task };
|
||||
|
||||
//internal_task_entry &front = m_internal_tasks.front();
|
||||
|
||||
|
@ -1161,7 +1176,7 @@ namespace rsx
|
|||
|
||||
//std::future<void> thread::add_internal_task(std::function<bool()> callback)
|
||||
//{
|
||||
// std::lock_guard<std::mutex> lock{ m_mtx_task };
|
||||
// std::lock_guard<shared_mutex> lock{ m_mtx_task };
|
||||
// m_internal_tasks.emplace_back(callback);
|
||||
|
||||
// return m_internal_tasks.back().promise.get_future();
|
||||
|
@ -2075,10 +2090,20 @@ namespace rsx
|
|||
skip_frame = (m_skip_frame_ctr < 0);
|
||||
}
|
||||
|
||||
//Reset zcull ctrl
|
||||
zcull_ctrl->set_active(this, false);
|
||||
zcull_ctrl->clear();
|
||||
|
||||
if (zcull_ctrl->has_pending())
|
||||
{
|
||||
LOG_ERROR(RSX, "Dangling reports found, discarding...");
|
||||
zcull_ctrl->sync(this);
|
||||
}
|
||||
|
||||
performance_counters.sampled_frames++;
|
||||
}
|
||||
|
||||
void thread::check_zcull_status(bool framebuffer_swap, bool force_read)
|
||||
void thread::check_zcull_status(bool framebuffer_swap)
|
||||
{
|
||||
if (g_cfg.video.disable_zcull_queries)
|
||||
return;
|
||||
|
@ -2108,35 +2133,8 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
occlusion_query_info* query = nullptr;
|
||||
|
||||
if (zcull_task_queue.task_stack.size() > 0)
|
||||
query = zcull_task_queue.active_query;
|
||||
|
||||
if (query && query->active)
|
||||
{
|
||||
if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active))
|
||||
{
|
||||
end_occlusion_query(query);
|
||||
query->active = false;
|
||||
query->pending = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (zcull_rendering_enabled && testing_enabled && zcull_surface_active)
|
||||
{
|
||||
//Find query
|
||||
u32 free_index = synchronize_zcull_stats();
|
||||
query = &occlusion_query_data[free_index];
|
||||
zcull_task_queue.add(query);
|
||||
|
||||
begin_occlusion_query(query);
|
||||
query->active = true;
|
||||
query->result = 0;
|
||||
query->num_draws = 0;
|
||||
}
|
||||
}
|
||||
zcull_ctrl->set_enabled(this, zcull_rendering_enabled);
|
||||
zcull_ctrl->set_active(this, zcull_rendering_enabled && testing_enabled && zcull_surface_active);
|
||||
}
|
||||
|
||||
void thread::clear_zcull_stats(u32 type)
|
||||
|
@ -2144,113 +2142,50 @@ namespace rsx
|
|||
if (g_cfg.video.disable_zcull_queries)
|
||||
return;
|
||||
|
||||
if (type == CELL_GCM_ZPASS_PIXEL_CNT)
|
||||
{
|
||||
if (zcull_task_queue.active_query &&
|
||||
zcull_task_queue.active_query->active &&
|
||||
zcull_task_queue.active_query->num_draws > 0)
|
||||
{
|
||||
//discard active query results
|
||||
check_zcull_status(false, true);
|
||||
zcull_task_queue.active_query->pending = false;
|
||||
zcull_ctrl->clear();
|
||||
}
|
||||
|
||||
//re-enable cull stats if stats are enabled
|
||||
check_zcull_status(false, false);
|
||||
zcull_task_queue.active_query->num_draws = 0;
|
||||
void thread::get_zcull_stats(u32 type, vm::addr_t sink)
|
||||
{
|
||||
u32 value = 0;
|
||||
if (!g_cfg.video.disable_zcull_queries)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||
{
|
||||
zcull_ctrl->read_report(this, sink, type);
|
||||
return;
|
||||
}
|
||||
|
||||
current_zcull_stats.clear();
|
||||
}
|
||||
}
|
||||
|
||||
u32 thread::get_zcull_stats(u32 type)
|
||||
{
|
||||
if (g_cfg.video.disable_zcull_queries)
|
||||
return 0u;
|
||||
|
||||
if (zcull_task_queue.active_query &&
|
||||
zcull_task_queue.active_query->active &&
|
||||
current_zcull_stats.zpass_pixel_cnt == 0 &&
|
||||
type == CELL_GCM_ZPASS_PIXEL_CNT)
|
||||
{
|
||||
//The zcull unit is still bound as the read is happening and there are no results ready
|
||||
check_zcull_status(false, true); //close current query
|
||||
check_zcull_status(false, false); //start new query since stat counting is still active
|
||||
}
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||
{
|
||||
if (current_zcull_stats.zpass_pixel_cnt > 0)
|
||||
return UINT16_MAX;
|
||||
|
||||
synchronize_zcull_stats(true);
|
||||
return (current_zcull_stats.zpass_pixel_cnt > 0) ? UINT16_MAX : 0;
|
||||
}
|
||||
case CELL_GCM_ZCULL_STATS:
|
||||
case CELL_GCM_ZCULL_STATS1:
|
||||
case CELL_GCM_ZCULL_STATS2:
|
||||
//TODO
|
||||
return UINT16_MAX;
|
||||
case CELL_GCM_ZCULL_STATS3:
|
||||
{
|
||||
//Some kind of inverse value
|
||||
if (current_zcull_stats.zpass_pixel_cnt > 0)
|
||||
return 0;
|
||||
|
||||
synchronize_zcull_stats(true);
|
||||
return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 thread::synchronize_zcull_stats(bool hard_sync)
|
||||
{
|
||||
if (!zcull_rendering_enabled || zcull_task_queue.pending == 0)
|
||||
return 0;
|
||||
|
||||
u32 result = UINT16_MAX;
|
||||
|
||||
for (auto &query : zcull_task_queue.task_stack)
|
||||
{
|
||||
if (query == nullptr || query->active)
|
||||
continue;
|
||||
|
||||
bool status = check_occlusion_query_status(query);
|
||||
if (status == false && !hard_sync)
|
||||
continue;
|
||||
|
||||
get_occlusion_query_result(query);
|
||||
current_zcull_stats.zpass_pixel_cnt += query->result;
|
||||
|
||||
query->pending = false;
|
||||
query = nullptr;
|
||||
zcull_task_queue.pending--;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < occlusion_query_count; ++i)
|
||||
{
|
||||
auto &query = occlusion_query_data[i];
|
||||
if (!query.pending && !query.active)
|
||||
case CELL_GCM_ZCULL_STATS:
|
||||
case CELL_GCM_ZCULL_STATS1:
|
||||
case CELL_GCM_ZCULL_STATS2:
|
||||
case CELL_GCM_ZCULL_STATS3:
|
||||
{
|
||||
result = i;
|
||||
//TODO
|
||||
value = (type != CELL_GCM_ZCULL_STATS3)? UINT16_MAX : 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(RSX, "Unknown zcull stat type %d", type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == UINT16_MAX && !hard_sync)
|
||||
return synchronize_zcull_stats(true);
|
||||
vm::ptr<CellGcmReportData> result = sink;
|
||||
result->value = value;
|
||||
result->padding = 0;
|
||||
result->timer = timestamp();
|
||||
}
|
||||
|
||||
return result;
|
||||
void thread::sync()
|
||||
{
|
||||
zcull_ctrl->sync(this);
|
||||
}
|
||||
|
||||
void thread::notify_zcull_info_changed()
|
||||
{
|
||||
check_zcull_status(false, false);
|
||||
check_zcull_status(false);
|
||||
}
|
||||
|
||||
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
|
||||
|
@ -2356,4 +2291,385 @@ namespace rsx
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace reports
|
||||
{
|
||||
void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state)
|
||||
{
|
||||
if (state != enabled)
|
||||
{
|
||||
enabled = state;
|
||||
|
||||
if (active && !enabled)
|
||||
set_active(ptimer, false);
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state)
|
||||
{
|
||||
if (state != active)
|
||||
{
|
||||
active = state;
|
||||
|
||||
if (state)
|
||||
{
|
||||
verify(HERE), enabled && m_current_task == nullptr;
|
||||
allocate_new_query(ptimer);
|
||||
begin_occlusion_query(m_current_task);
|
||||
}
|
||||
else
|
||||
{
|
||||
verify(HERE), m_current_task;
|
||||
if (m_current_task->num_draws)
|
||||
{
|
||||
end_occlusion_query(m_current_task);
|
||||
m_current_task->active = false;
|
||||
m_current_task->pending = true;
|
||||
|
||||
m_pending_writes.push_back({});
|
||||
m_pending_writes.back().query = m_current_task;
|
||||
}
|
||||
else
|
||||
{
|
||||
discard_occlusion_query(m_current_task);
|
||||
m_current_task->active = false;
|
||||
}
|
||||
|
||||
m_current_task = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type)
|
||||
{
|
||||
if (m_current_task)
|
||||
{
|
||||
m_current_task->owned = true;
|
||||
end_occlusion_query(m_current_task);
|
||||
m_pending_writes.push_back({});
|
||||
|
||||
m_current_task->active = false;
|
||||
m_current_task->pending = true;
|
||||
m_pending_writes.back().query = m_current_task;
|
||||
|
||||
allocate_new_query(ptimer);
|
||||
begin_occlusion_query(m_current_task);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Spam; send null query down the pipeline to copy the last result
|
||||
//Might be used to capture a timestamp (verify)
|
||||
m_pending_writes.push_back({});
|
||||
}
|
||||
|
||||
auto forwarder = &m_pending_writes.back();
|
||||
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++)
|
||||
{
|
||||
if (!It->sink)
|
||||
{
|
||||
It->counter_tag = m_statistics_tag_id;
|
||||
It->due_tsc = m_tsc + m_cycles_delay;
|
||||
It->sink = sink;
|
||||
It->type = type;
|
||||
|
||||
if (forwarder != &(*It))
|
||||
{
|
||||
//Not the last one in the chain, forward the writing operation to the last writer
|
||||
It->forwarder = forwarder;
|
||||
It->query->owned = true;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::allocate_new_query(::rsx::thread* ptimer)
|
||||
{
|
||||
int retries = 0;
|
||||
while (!Emu.IsStopped())
|
||||
{
|
||||
for (int n = 0; n < occlusion_query_count; ++n)
|
||||
{
|
||||
if (m_occlusion_query_data[n].pending || m_occlusion_query_data[n].active)
|
||||
continue;
|
||||
|
||||
m_current_task = &m_occlusion_query_data[n];
|
||||
m_current_task->num_draws = 0;
|
||||
m_current_task->result = 0;
|
||||
m_current_task->sync_timestamp = 0;
|
||||
m_current_task->active = true;
|
||||
m_current_task->owned = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (retries > 0)
|
||||
{
|
||||
LOG_ERROR(RSX, "ZCULL report queue is overflowing!!");
|
||||
m_statistics_map[m_statistics_tag_id] = 1;
|
||||
|
||||
verify(HERE), m_pending_writes.front().sink == 0;
|
||||
m_pending_writes.resize(0);
|
||||
|
||||
for (auto &query : m_occlusion_query_data)
|
||||
{
|
||||
discard_occlusion_query(&query);
|
||||
query.pending = false;
|
||||
}
|
||||
|
||||
m_current_task = &m_occlusion_query_data[0];
|
||||
m_current_task->num_draws = 0;
|
||||
m_current_task->result = 0;
|
||||
m_current_task->sync_timestamp = 0;
|
||||
m_current_task->active = true;
|
||||
m_current_task->owned = false;
|
||||
return;
|
||||
}
|
||||
|
||||
//All slots are occupied, try to pop the earliest entry
|
||||
m_tsc += max_zcull_cycles_delay;
|
||||
update(ptimer);
|
||||
|
||||
retries++;
|
||||
}
|
||||
}
|
||||
|
||||
void ZCULL_control::clear()
|
||||
{
|
||||
if (!m_pending_writes.empty())
|
||||
{
|
||||
//Remove any dangling/unclaimed queries as the information is lost anyway
|
||||
auto valid_size = m_pending_writes.size();
|
||||
for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It)
|
||||
{
|
||||
if (!It->sink)
|
||||
{
|
||||
discard_occlusion_query(It->query);
|
||||
It->query->pending = false;
|
||||
valid_size--;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
m_pending_writes.resize(valid_size);
|
||||
}
|
||||
|
||||
m_statistics_tag_id++;
|
||||
m_statistics_map[m_statistics_tag_id] = 0;
|
||||
}
|
||||
|
||||
void ZCULL_control::on_draw()
|
||||
{
|
||||
if (m_current_task)
|
||||
m_current_task->num_draws++;
|
||||
|
||||
m_cycles_delay = max_zcull_cycles_delay;
|
||||
}
|
||||
|
||||
void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 value)
|
||||
{
|
||||
verify(HERE), sink;
|
||||
vm::ptr<CellGcmReportData> out = sink;
|
||||
out->value = value;
|
||||
out->timer = timestamp;
|
||||
out->padding = 0;
|
||||
}
|
||||
|
||||
void ZCULL_control::sync(::rsx::thread* ptimer)
|
||||
{
|
||||
if (!m_pending_writes.empty())
|
||||
{
|
||||
u32 processed = 0;
|
||||
const bool has_unclaimed = (m_pending_writes.back().sink == 0);
|
||||
|
||||
//Write all claimed reports unconditionally
|
||||
for (auto &writer : m_pending_writes)
|
||||
{
|
||||
if (!writer.sink)
|
||||
break;
|
||||
|
||||
auto query = writer.query;
|
||||
u32 result = m_statistics_map[writer.counter_tag];
|
||||
|
||||
if (query)
|
||||
{
|
||||
verify(HERE), query->pending;
|
||||
|
||||
if (!result && query->num_draws)
|
||||
{
|
||||
get_occlusion_query_result(query);
|
||||
|
||||
if (query->result)
|
||||
{
|
||||
result += query->result;
|
||||
m_statistics_map[writer.counter_tag] = result;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Already have a hit, no need to retest
|
||||
discard_occlusion_query(query);
|
||||
}
|
||||
|
||||
query->pending = false;
|
||||
}
|
||||
|
||||
if (!writer.forwarder)
|
||||
//No other queries in the chain, write result
|
||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
||||
|
||||
processed++;
|
||||
}
|
||||
|
||||
if (!has_unclaimed)
|
||||
{
|
||||
verify(HERE), processed == m_pending_writes.size();
|
||||
m_pending_writes.resize(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto remaining = m_pending_writes.size() - processed;
|
||||
verify(HERE), remaining > 0;
|
||||
|
||||
if (remaining == 1)
|
||||
{
|
||||
m_pending_writes.front() = m_pending_writes.back();
|
||||
m_pending_writes.resize(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin());
|
||||
m_pending_writes.resize(remaining);
|
||||
}
|
||||
}
|
||||
|
||||
//Delete all statistics caches but leave the current one
|
||||
for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); )
|
||||
{
|
||||
if (It->first == m_statistics_tag_id)
|
||||
++It;
|
||||
else
|
||||
It = m_statistics_map.erase(It);
|
||||
}
|
||||
}
|
||||
|
||||
//Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available
|
||||
m_cycles_delay = 2;
|
||||
}
|
||||
|
||||
void ZCULL_control::update(::rsx::thread* ptimer)
|
||||
{
|
||||
m_tsc++;
|
||||
|
||||
if (m_pending_writes.empty())
|
||||
return;
|
||||
|
||||
u32 stat_tag_to_remove = m_statistics_tag_id;
|
||||
u32 processed = 0;
|
||||
for (auto &writer : m_pending_writes)
|
||||
{
|
||||
if (!writer.sink)
|
||||
break;
|
||||
|
||||
if (writer.counter_tag != stat_tag_to_remove &&
|
||||
stat_tag_to_remove != m_statistics_tag_id)
|
||||
{
|
||||
//If the stat id is different from this stat id and the queue is advancing,
|
||||
//its guaranteed that the previous tag has no remaining writes as the queue is ordered
|
||||
m_statistics_map.erase(stat_tag_to_remove);
|
||||
stat_tag_to_remove = m_statistics_tag_id;
|
||||
}
|
||||
|
||||
auto query = writer.query;
|
||||
u32 result = m_statistics_map[writer.counter_tag];
|
||||
|
||||
if (query)
|
||||
{
|
||||
verify(HERE), query->pending;
|
||||
|
||||
if (UNLIKELY(writer.due_tsc < m_tsc))
|
||||
{
|
||||
if (!result && query->num_draws)
|
||||
{
|
||||
get_occlusion_query_result(query);
|
||||
|
||||
if (query->result)
|
||||
{
|
||||
result += query->result;
|
||||
m_statistics_map[writer.counter_tag] = result;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//No need to read this
|
||||
discard_occlusion_query(query);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (result || !query->num_draws)
|
||||
{
|
||||
//Not necessary to read the result anymore
|
||||
discard_occlusion_query(query);
|
||||
}
|
||||
else
|
||||
{
|
||||
//Maybe we get lucky and results are ready
|
||||
if (check_occlusion_query_status(query))
|
||||
{
|
||||
get_occlusion_query_result(query);
|
||||
if (query->result)
|
||||
{
|
||||
result += query->result;
|
||||
m_statistics_map[writer.counter_tag] = result;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Too early; abort
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
query->pending = false;
|
||||
}
|
||||
|
||||
stat_tag_to_remove = writer.counter_tag;
|
||||
|
||||
//only zpass supported right now
|
||||
if (!writer.forwarder)
|
||||
//No other queries in the chain, write result
|
||||
write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0);
|
||||
|
||||
processed++;
|
||||
}
|
||||
|
||||
if (stat_tag_to_remove != m_statistics_tag_id)
|
||||
m_statistics_map.erase(stat_tag_to_remove);
|
||||
|
||||
if (processed)
|
||||
{
|
||||
auto remaining = m_pending_writes.size() - processed;
|
||||
if (remaining == 1)
|
||||
{
|
||||
m_pending_writes.front() = m_pending_writes.back();
|
||||
m_pending_writes.resize(1);
|
||||
}
|
||||
else if (remaining)
|
||||
{
|
||||
std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin());
|
||||
m_pending_writes.resize(remaining);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pending_writes.resize(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,64 +157,91 @@ namespace rsx
|
|||
std::array<attribute_buffer_placement, 16> attribute_placement;
|
||||
};
|
||||
|
||||
struct zcull_statistics
|
||||
namespace reports
|
||||
{
|
||||
u32 zpass_pixel_cnt;
|
||||
u32 zcull_stats;
|
||||
u32 zcull_stats1;
|
||||
u32 zcull_stats2;
|
||||
u32 zcull_stats3;
|
||||
|
||||
void clear()
|
||||
struct occlusion_query_info
|
||||
{
|
||||
zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0;
|
||||
}
|
||||
};
|
||||
u32 driver_handle;
|
||||
u32 result;
|
||||
u32 num_draws;
|
||||
bool pending;
|
||||
bool active;
|
||||
bool owned;
|
||||
|
||||
struct occlusion_query_info
|
||||
{
|
||||
u32 driver_handle;
|
||||
u32 result;
|
||||
u32 num_draws;
|
||||
bool pending;
|
||||
bool active;
|
||||
u64 sync_timestamp;
|
||||
};
|
||||
|
||||
u64 sync_timestamp;
|
||||
u64 external_flags;
|
||||
};
|
||||
|
||||
struct occlusion_task
|
||||
{
|
||||
std::vector<occlusion_query_info*> task_stack;
|
||||
occlusion_query_info* active_query = nullptr;
|
||||
u32 pending = 0;
|
||||
|
||||
//Add one query to the task
|
||||
void add(occlusion_query_info* query)
|
||||
struct queued_report_write
|
||||
{
|
||||
active_query = query;
|
||||
u32 type = CELL_GCM_ZPASS_PIXEL_CNT;
|
||||
u32 counter_tag;
|
||||
occlusion_query_info* query;
|
||||
queued_report_write* forwarder;
|
||||
vm::addr_t sink;
|
||||
|
||||
if (task_stack.size() > 0 && pending == 0)
|
||||
task_stack.resize(0);
|
||||
u32 due_tsc;
|
||||
};
|
||||
|
||||
const auto empty_slots = task_stack.size() - pending;
|
||||
if (empty_slots >= 4)
|
||||
{
|
||||
for (auto &_query : task_stack)
|
||||
{
|
||||
if (_query == nullptr)
|
||||
{
|
||||
_query = query;
|
||||
pending++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
struct ZCULL_control
|
||||
{
|
||||
//Delay in 'cycles' before a report update operation is forced to retire
|
||||
//Larger values might give more performance but some engines (UE3) dont seem to wait for results and will flicker
|
||||
//TODO: Determine the real max delay in real hardware
|
||||
const u32 max_zcull_cycles_delay = 10;
|
||||
|
||||
task_stack.push_back(query);
|
||||
pending++;
|
||||
}
|
||||
};
|
||||
//Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
||||
const u32 occlusion_query_count = 128;
|
||||
|
||||
bool active = false;
|
||||
bool enabled = false;
|
||||
|
||||
std::array<occlusion_query_info, 128> m_occlusion_query_data = {};
|
||||
|
||||
occlusion_query_info* m_current_task = nullptr;
|
||||
u32 m_statistics_tag_id = 0;
|
||||
u32 m_tsc = 0;
|
||||
u32 m_cycles_delay = 10;
|
||||
|
||||
std::vector<queued_report_write> m_pending_writes;
|
||||
std::unordered_map<u32, u32> m_statistics_map;
|
||||
|
||||
ZCULL_control() {}
|
||||
~ZCULL_control() {}
|
||||
|
||||
void set_enabled(class ::rsx::thread* ptimer, bool enabled);
|
||||
void set_active(class ::rsx::thread* ptimer, bool active);
|
||||
|
||||
void write(vm::addr_t sink, u32 timestamp, u32 value);
|
||||
|
||||
//Read current zcull statistics into the address provided
|
||||
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
|
||||
|
||||
//Sets up a new query slot and sets it to the current task
|
||||
void allocate_new_query(class ::rsx::thread* ptimer);
|
||||
|
||||
//clears current stat block and increments stat_tag_id
|
||||
void clear();
|
||||
|
||||
//forcefully flushes all
|
||||
void sync(class ::rsx::thread* ptimer);
|
||||
|
||||
//call once every 'tick' to update
|
||||
void update(class ::rsx::thread* ptimer);
|
||||
|
||||
//Draw call notification
|
||||
void on_draw();
|
||||
|
||||
//Check for pending writes
|
||||
bool has_pending() const { return (m_pending_writes.size() != 0); }
|
||||
|
||||
//Backend methods (optional, will return everything as always visible by default)
|
||||
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
|
||||
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
|
||||
virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
};
|
||||
}
|
||||
|
||||
struct sampled_image_descriptor_base;
|
||||
|
||||
|
@ -236,11 +263,7 @@ namespace rsx
|
|||
|
||||
//occlusion query
|
||||
bool zcull_surface_active = false;
|
||||
zcull_statistics current_zcull_stats;
|
||||
|
||||
const u32 occlusion_query_count = 128;
|
||||
std::array<occlusion_query_info, 128> occlusion_query_data = {};
|
||||
occlusion_task zcull_task_queue = {};
|
||||
std::unique_ptr<reports::ZCULL_control> zcull_ctrl;
|
||||
|
||||
//framebuffer setup
|
||||
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
|
||||
|
@ -382,17 +405,14 @@ namespace rsx
|
|||
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
||||
|
||||
//zcull
|
||||
virtual void notify_zcull_info_changed();
|
||||
virtual void clear_zcull_stats(u32 type);
|
||||
virtual u32 get_zcull_stats(u32 type);
|
||||
virtual void check_zcull_status(bool framebuffer_swap, bool force_read);
|
||||
virtual u32 synchronize_zcull_stats(bool hard_sync = false);
|
||||
|
||||
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
|
||||
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
|
||||
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
|
||||
void notify_zcull_info_changed();
|
||||
void clear_zcull_stats(u32 type);
|
||||
void check_zcull_status(bool framebuffer_swap);
|
||||
void get_zcull_stats(u32 type, vm::addr_t sink);
|
||||
|
||||
//sync
|
||||
void sync();
|
||||
|
||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<std::pair<u32, u32> >& draw_indexed_clause) const;
|
||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
|
||||
|
||||
|
@ -433,7 +453,7 @@ namespace rsx
|
|||
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
|
||||
|
||||
private:
|
||||
std::mutex m_mtx_task;
|
||||
shared_mutex m_mtx_task;
|
||||
|
||||
struct internal_task_entry
|
||||
{
|
||||
|
|
|
@ -566,7 +566,7 @@ VKGSRender::VKGSRender() : GSRender()
|
|||
//Occlusion
|
||||
m_occlusion_query_pool.create((*m_device), DESCRIPTOR_MAX_DRAW_CALLS); //Enough for 4k draw calls per pass
|
||||
for (int n = 0; n < 128; ++n)
|
||||
occlusion_query_data[n].driver_handle = n;
|
||||
m_occlusion_query_data[n].driver_handle = n;
|
||||
|
||||
//Generate frame contexts
|
||||
VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS };
|
||||
|
@ -769,7 +769,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
{
|
||||
vk::texture_cache::thrashed_set result;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
|
||||
result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()));
|
||||
}
|
||||
|
||||
|
@ -777,7 +777,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
return false;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
|
||||
|
@ -795,7 +795,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
|
||||
vm::temporary_unlock();
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_flush_queue_mutex);
|
||||
|
||||
m_flush_requests.post(sync_timestamp == 0ull);
|
||||
has_queue_ref = true;
|
||||
|
@ -846,13 +846,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
|||
|
||||
void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
|
||||
if (m_texture_cache.invalidate_range(address_base, size, true, true, false,
|
||||
m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()).violation_handled)
|
||||
{
|
||||
m_texture_cache.purge_dirty();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
|
@ -866,7 +866,7 @@ void VKGSRender::notify_tile_unbound(u32 tile)
|
|||
//m_rtts.invalidate_surface_address(addr, false);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
m_samplers_dirty.store(true);
|
||||
}
|
||||
}
|
||||
|
@ -903,6 +903,7 @@ void VKGSRender::check_heap_status()
|
|||
m_attrib_ring_info.reset_allocation_stats();
|
||||
m_texture_upload_buffer_ring_info.reset_allocation_stats();
|
||||
m_current_frame->reset_heap_ptrs();
|
||||
m_last_heap_sync_time = get_system_time();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1063,7 +1064,7 @@ void VKGSRender::end()
|
|||
std::chrono::time_point<steady_clock> textures_start = vertex_end;
|
||||
//Load textures
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_sampler_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||
bool update_framebuffer_sourced = false;
|
||||
|
||||
if (surface_store_tag != m_rtts.cache_tag)
|
||||
|
@ -1356,40 +1357,15 @@ void VKGSRender::end()
|
|||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
bool free_slot_found = false;
|
||||
u32 index_to_free = UINT32_MAX;
|
||||
u64 earliest_timestamp = UINT64_MAX;
|
||||
m_tsc += 100;
|
||||
update(this);
|
||||
|
||||
//flush occlusion queries
|
||||
for (auto It : m_occlusion_map)
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
if (occlusion_id == UINT32_MAX)
|
||||
{
|
||||
u32 index = It.first;
|
||||
auto query = &occlusion_query_data[index];
|
||||
if (check_occlusion_query_status(query))
|
||||
{
|
||||
free_slot_found = true;
|
||||
get_occlusion_query_result(query);
|
||||
break;
|
||||
}
|
||||
|
||||
if (query->sync_timestamp < earliest_timestamp)
|
||||
{
|
||||
index_to_free = index;
|
||||
earliest_timestamp = query->sync_timestamp;
|
||||
}
|
||||
LOG_ERROR(RSX, "Occlusion pool overflow");
|
||||
if (m_current_task) m_current_task->result = 1;
|
||||
}
|
||||
|
||||
if (free_slot_found)
|
||||
{
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
}
|
||||
else
|
||||
{
|
||||
get_occlusion_query_result(&occlusion_query_data[index_to_free]);
|
||||
occlusion_id = m_occlusion_query_pool.find_free_slot();
|
||||
}
|
||||
|
||||
verify(HERE), occlusion_id != UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1441,7 +1417,7 @@ void VKGSRender::end()
|
|||
const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
|
||||
const bool single_draw = !supports_multidraw || (!is_emulated_restart && (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive));
|
||||
|
||||
if (m_occlusion_query_active)
|
||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||
{
|
||||
//Begin query
|
||||
m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id);
|
||||
|
@ -1500,7 +1476,7 @@ void VKGSRender::end()
|
|||
}
|
||||
}
|
||||
|
||||
if (m_occlusion_query_active)
|
||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||
{
|
||||
//End query
|
||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
||||
|
@ -1565,6 +1541,7 @@ void VKGSRender::on_init_thread()
|
|||
|
||||
GSRender::on_init_thread();
|
||||
rsx_thread = std::this_thread::get_id();
|
||||
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
|
||||
|
||||
if (!supports_native_ui)
|
||||
{
|
||||
|
@ -1627,6 +1604,7 @@ void VKGSRender::on_init_thread()
|
|||
|
||||
void VKGSRender::on_exit()
|
||||
{
|
||||
zcull_ctrl.release();
|
||||
return GSRender::on_exit();
|
||||
}
|
||||
|
||||
|
@ -2002,7 +1980,7 @@ void VKGSRender::do_local_task(bool /*idle*/)
|
|||
{
|
||||
if (m_flush_requests.pending())
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
|
||||
std::lock_guard<shared_mutex> lock(m_flush_queue_mutex);
|
||||
|
||||
//TODO: Determine if a hard sync is necessary
|
||||
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
||||
|
@ -2610,8 +2588,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
}
|
||||
}
|
||||
|
||||
if ((window_clip_width && window_clip_width != clip_width) ||
|
||||
(window_clip_height && window_clip_height != clip_height))
|
||||
if ((window_clip_width && window_clip_width < clip_width) ||
|
||||
(window_clip_height && window_clip_height < clip_height))
|
||||
{
|
||||
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
|
||||
window_clip_width, window_clip_height, clip_width, clip_height);
|
||||
|
@ -2818,7 +2796,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
|
|||
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, fbo_width, fbo_height, std::move(fbo_images)));
|
||||
}
|
||||
|
||||
check_zcull_status(true, false);
|
||||
check_zcull_status(true);
|
||||
}
|
||||
|
||||
void VKGSRender::reinitialize_swapchain()
|
||||
|
@ -3194,7 +3172,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
|||
check_heap_status();
|
||||
|
||||
//Stop all parallel operations until this is finished
|
||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||
std::lock_guard<shared_mutex> lock(m_secondary_cb_guard);
|
||||
|
||||
auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer);
|
||||
m_current_command_buffer->begin();
|
||||
|
@ -3240,31 +3218,32 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst
|
|||
return false;
|
||||
}
|
||||
|
||||
void VKGSRender::clear_zcull_stats(u32 type)
|
||||
{
|
||||
rsx::thread::clear_zcull_stats(type);
|
||||
m_occlusion_map.clear();
|
||||
m_occlusion_query_pool.reset_all(*m_current_command_buffer);
|
||||
}
|
||||
|
||||
void VKGSRender::begin_occlusion_query(rsx::occlusion_query_info* query)
|
||||
void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
query->result = 0;
|
||||
query->sync_timestamp = get_system_time();
|
||||
//query->sync_timestamp = get_system_time();
|
||||
m_active_query_info = query;
|
||||
m_occlusion_query_active = true;
|
||||
}
|
||||
|
||||
void VKGSRender::end_occlusion_query(rsx::occlusion_query_info* query)
|
||||
void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
m_occlusion_query_active = false;
|
||||
m_active_query_info = nullptr;
|
||||
|
||||
flush_command_queue();
|
||||
//Avoid stalling later if this query is already tied to a report
|
||||
if (query->num_draws && query->owned && !m_flush_requests.pending())
|
||||
{
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
}
|
||||
}
|
||||
|
||||
bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
|
||||
bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
if (!query->num_draws)
|
||||
return true;
|
||||
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
return true;
|
||||
|
@ -3274,16 +3253,26 @@ bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query)
|
|||
return true;
|
||||
|
||||
if (data.command_buffer_to_wait == m_current_command_buffer)
|
||||
{
|
||||
if (!m_flush_requests.pending())
|
||||
{
|
||||
//Likely to be read at some point in the near future, submit now to avoid stalling later
|
||||
m_flush_requests.post(false);
|
||||
m_flush_requests.remove_one();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (data.command_buffer_to_wait->pending)
|
||||
//Don't bother poking the state, a flush later will likely do it for free
|
||||
return false;
|
||||
|
||||
u32 oldest = data.indices.front();
|
||||
return m_occlusion_query_pool.check_query_status(oldest);
|
||||
}
|
||||
|
||||
void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
|
||||
void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
|
@ -3293,20 +3282,32 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
|
|||
if (data.indices.size() == 0)
|
||||
return;
|
||||
|
||||
if (data.command_buffer_to_wait == m_current_command_buffer)
|
||||
flush_command_queue(); //Should hard sync, but this should almost never ever happen
|
||||
|
||||
if (data.command_buffer_to_wait->pending)
|
||||
data.command_buffer_to_wait->wait();
|
||||
|
||||
//Gather data
|
||||
for (const auto occlusion_id : data.indices)
|
||||
if (query->num_draws)
|
||||
{
|
||||
//We only need one hit
|
||||
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
|
||||
if (data.command_buffer_to_wait == m_current_command_buffer)
|
||||
{
|
||||
query->result = 1;
|
||||
break;
|
||||
flush_command_queue();
|
||||
|
||||
//Clear any deferred flush requests from previous call to get_query_status()
|
||||
if (m_flush_requests.pending())
|
||||
{
|
||||
m_flush_requests.clear_pending_flag();
|
||||
m_flush_requests.consumer_wait();
|
||||
}
|
||||
}
|
||||
|
||||
if (data.command_buffer_to_wait->pending)
|
||||
data.command_buffer_to_wait->wait();
|
||||
|
||||
//Gather data
|
||||
for (const auto occlusion_id : data.indices)
|
||||
{
|
||||
//We only need one hit
|
||||
if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id))
|
||||
{
|
||||
query->result = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3314,6 +3315,26 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query)
|
|||
m_occlusion_map.erase(query->driver_handle);
|
||||
}
|
||||
|
||||
void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
||||
{
|
||||
if (m_active_query_info == query)
|
||||
{
|
||||
m_occlusion_query_active = false;
|
||||
m_active_query_info = nullptr;
|
||||
}
|
||||
|
||||
auto found = m_occlusion_map.find(query->driver_handle);
|
||||
if (found == m_occlusion_map.end())
|
||||
return;
|
||||
|
||||
auto &data = found->second;
|
||||
if (data.indices.size() == 0)
|
||||
return;
|
||||
|
||||
m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices);
|
||||
m_occlusion_map.erase(query->driver_handle);
|
||||
}
|
||||
|
||||
void VKGSRender::shell_do_cleanup()
|
||||
{
|
||||
//TODO: Guard this
|
||||
|
|
|
@ -38,7 +38,7 @@ namespace vk
|
|||
|
||||
//Heap allocation sizes in MB
|
||||
//NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px)
|
||||
#define VK_ATTRIB_RING_BUFFER_SIZE_M 256
|
||||
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
|
||||
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
|
||||
#define VK_UBO_RING_BUFFER_SIZE_M 128
|
||||
#define VK_INDEX_RING_BUFFER_SIZE_M 64
|
||||
|
@ -55,7 +55,7 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
|
||||
std::atomic_bool pending = { false };
|
||||
std::atomic<u64> last_sync = { 0 };
|
||||
std::mutex guard_mutex;
|
||||
shared_mutex guard_mutex;
|
||||
|
||||
command_buffer_chunk()
|
||||
{}
|
||||
|
@ -97,7 +97,7 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
{
|
||||
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_mutex);
|
||||
std::lock_guard<shared_mutex> lock(guard_mutex);
|
||||
|
||||
if (pending)
|
||||
{
|
||||
|
@ -111,7 +111,7 @@ struct command_buffer_chunk: public vk::command_buffer
|
|||
|
||||
void wait()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_mutex);
|
||||
std::lock_guard<shared_mutex> lock(guard_mutex);
|
||||
|
||||
if (!pending)
|
||||
return;
|
||||
|
@ -244,7 +244,7 @@ struct flush_request_task
|
|||
}
|
||||
};
|
||||
|
||||
class VKGSRender : public GSRender
|
||||
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
||||
{
|
||||
private:
|
||||
VKFragmentProgram m_fragment_prog;
|
||||
|
@ -265,7 +265,7 @@ private:
|
|||
std::unique_ptr<vk::depth_scaling_pass> m_depth_scaler;
|
||||
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
|
||||
|
||||
std::mutex m_sampler_mutex;
|
||||
shared_mutex m_sampler_mutex;
|
||||
u64 surface_store_tag = 0;
|
||||
std::atomic_bool m_samplers_dirty = { true };
|
||||
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
||||
|
@ -292,10 +292,10 @@ private:
|
|||
vk::command_pool m_command_buffer_pool;
|
||||
vk::occlusion_query_pool m_occlusion_query_pool;
|
||||
bool m_occlusion_query_active = false;
|
||||
rsx::occlusion_query_info *m_active_query_info = nullptr;
|
||||
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
|
||||
std::unordered_map<u32, occlusion_data> m_occlusion_map;
|
||||
|
||||
std::mutex m_secondary_cb_guard;
|
||||
shared_mutex m_secondary_cb_guard;
|
||||
vk::command_pool m_secondary_command_buffer_pool;
|
||||
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
|
||||
|
||||
|
@ -346,7 +346,7 @@ private:
|
|||
bool m_flush_draw_buffers = false;
|
||||
std::atomic<int> m_last_flushable_cb = {-1 };
|
||||
|
||||
std::mutex m_flush_queue_mutex;
|
||||
shared_mutex m_flush_queue_mutex;
|
||||
flush_request_task m_flush_requests;
|
||||
|
||||
std::thread::id rsx_thread;
|
||||
|
@ -400,11 +400,11 @@ public:
|
|||
void write_buffers();
|
||||
void set_viewport();
|
||||
|
||||
void clear_zcull_stats(u32 type) override;
|
||||
void begin_occlusion_query(rsx::occlusion_query_info* query) override;
|
||||
void end_occlusion_query(rsx::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::occlusion_query_info* query) override;
|
||||
void get_occlusion_query_result(rsx::occlusion_query_info* query) override;
|
||||
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
||||
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
||||
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
||||
|
||||
protected:
|
||||
void begin() override;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "stdafx.h"
|
||||
#include "VKHelpers.h"
|
||||
|
||||
#include <mutex>
|
||||
#include "Utilities/mutex.h"
|
||||
|
||||
namespace vk
|
||||
{
|
||||
|
@ -24,7 +24,7 @@ namespace vk
|
|||
u64 g_num_total_frames = 0;
|
||||
|
||||
//global submit guard to prevent race condition on queue submit
|
||||
std::mutex g_submit_mutex;
|
||||
shared_mutex g_submit_mutex;
|
||||
|
||||
VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
|
||||
{
|
||||
|
|
|
@ -57,6 +57,7 @@ namespace rsx
|
|||
{
|
||||
void set_reference(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
rsx->sync();
|
||||
rsx->ctrl->ref.exchange(arg);
|
||||
}
|
||||
|
||||
|
@ -112,6 +113,7 @@ namespace rsx
|
|||
|
||||
void semaphore_release(thread* rsx, u32 _reg, u32 arg)
|
||||
{
|
||||
rsx->sync();
|
||||
rsx->sync_point_request = true;
|
||||
const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e());
|
||||
|
||||
|
@ -164,6 +166,8 @@ namespace rsx
|
|||
{
|
||||
//
|
||||
}
|
||||
|
||||
rsx->sync();
|
||||
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
|
||||
sema.semaphore[index].val = arg;
|
||||
sema.semaphore[index].pad = 0;
|
||||
|
@ -177,8 +181,9 @@ namespace rsx
|
|||
{
|
||||
//
|
||||
}
|
||||
u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
|
||||
|
||||
rsx->sync();
|
||||
u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
|
||||
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
|
||||
sema.semaphore[index].val = val;
|
||||
sema.semaphore[index].pad = 0;
|
||||
|
@ -433,16 +438,14 @@ namespace rsx
|
|||
case CELL_GCM_ZCULL_STATS1:
|
||||
case CELL_GCM_ZCULL_STATS2:
|
||||
case CELL_GCM_ZCULL_STATS3:
|
||||
result->value = rsx->get_zcull_stats(type);
|
||||
LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type);
|
||||
rsx->get_zcull_stats(type, address_ptr);
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type);
|
||||
result->timer = rsx->timestamp();
|
||||
result->padding = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
result->timer = rsx->timestamp();
|
||||
result->padding = 0;
|
||||
}
|
||||
|
||||
void clear_report_value(thread* rsx, u32 _reg, u32 arg)
|
||||
|
@ -450,10 +453,7 @@ namespace rsx
|
|||
switch (arg)
|
||||
{
|
||||
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT");
|
||||
break;
|
||||
case CELL_GCM_ZCULL_STATS:
|
||||
LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS");
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
|
||||
|
@ -492,6 +492,7 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
rsx->sync();
|
||||
vm::ptr<CellGcmReportData> result = address_ptr;
|
||||
rsx->conditional_render_test_failed = (result->value == 0);
|
||||
}
|
||||
|
@ -514,6 +515,11 @@ namespace rsx
|
|||
rsx->notify_zcull_info_changed();
|
||||
}
|
||||
|
||||
void sync(thread* rsx, u32, u32)
|
||||
{
|
||||
rsx->sync();
|
||||
}
|
||||
|
||||
void set_surface_dirty_bit(thread* rsx, u32, u32)
|
||||
{
|
||||
rsx->m_rtts_dirty = true;
|
||||
|
@ -1678,6 +1684,9 @@ namespace rsx
|
|||
bind<NV4097_SET_STENCIL_TEST_ENABLE, nv4097::set_surface_options_dirty_bit>();
|
||||
bind<NV4097_SET_DEPTH_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||
bind<NV4097_SET_COLOR_MASK, nv4097::set_surface_options_dirty_bit>();
|
||||
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
||||
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
|
||||
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
|
||||
|
||||
//NV308A
|
||||
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
|
||||
|
|
Loading…
Add table
Reference in a new issue