From 315798b1f48cffeda8df6344f4f4617e9fd82ac7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 5 Mar 2018 14:09:43 +0300 Subject: [PATCH] rsx: ZCULL rewrite and other improvements - ZCULL unit emulation rewritten - ZCULL reports are now deferred avoiding pipeline stalls - Minor optimizations; replaced std::mutex with shared_mutex where contention is rare - Silence unnecessary error message - Small improvement to out of memory handling for vulkan and slightly bump vertex buffer heap --- rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp | 8 +- rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 40 +- rpcs3/Emu/RSX/GL/GLGSRender.h | 14 +- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 6 +- rpcs3/Emu/RSX/RSXThread.cpp | 574 ++++++++++++++++----- rpcs3/Emu/RSX/RSXThread.h | 152 +++--- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 163 +++--- rpcs3/Emu/RSX/VK/VKGSRender.h | 28 +- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 4 +- rpcs3/Emu/RSX/rsx_methods.cpp | 27 +- 11 files changed, 697 insertions(+), 321 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp index 6c5567ec9a..92dce919e5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.cpp @@ -7,7 +7,7 @@ void data_cache::store_and_protect_data(u64 key, u32 start, size_t size, u8 format, size_t w, size_t h, size_t d, size_t m, ComPtr data) { - std::lock_guard lock(m_mut); + std::lock_guard lock(m_mut); m_address_to_data[key] = std::make_pair(texture_entry(format, w, h, d, m), data); protect_data(key, start, size); } @@ -25,7 +25,7 @@ void data_cache::protect_data(u64 key, u32 start, size_t size) bool data_cache::invalidate_address(u32 addr) { // In case 2 threads write to texture memory - std::lock_guard lock(m_mut); + std::lock_guard lock(m_mut); bool handled = false; auto It = m_protected_ranges.begin(), E = m_protected_ranges.end(); for (; It != E;) @@ -49,7 +49,7 @@ bool data_cache::invalidate_address(u32 addr) std::pair > *data_cache::find_data_if_available(u64 key) { - std::lock_guard lock(m_mut); + std::lock_guard lock(m_mut); auto It = m_address_to_data.find(key); if (It == m_address_to_data.end()) return nullptr; @@ -58,7 +58,7 @@ std::pair > *data_cache::find_data_if_avai void data_cache::unprotect_all() { - std::lock_guard lock(m_mut); + std::lock_guard lock(m_mut); for (auto &protectedTexture : m_protected_ranges) { u32 protectedRangeStart = std::get<1>(protectedTexture), protectedRangeSize = std::get<2>(protectedTexture); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h index d9d58adc38..10c8b5c7ad 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12MemoryHelpers.h @@ -98,7 +98,7 @@ private: * Memory protection fault catch can be generated by any thread and * modifies it. */ - std::mutex m_mut; + shared_mutex m_mut; std::unordered_map> > m_address_to_data; // Storage std::list > m_protected_ranges; // address, start of protected range, size of protected range diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 20e7ee016d..909ad66bdf 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -209,7 +209,7 @@ void GLGSRender::end() { std::chrono::time_point textures_start = steady_clock::now(); - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); void* unused = nullptr; bool update_framebuffer_sourced = false; @@ -598,6 +598,7 @@ void GLGSRender::set_viewport() void GLGSRender::on_init_thread() { GSRender::on_init_thread(); + zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this)); gl::init(); @@ -768,7 +769,7 @@ void GLGSRender::on_init_thread() for (u32 i = 0; i < occlusion_query_count; ++i) { GLuint handle = 0; - auto &query = occlusion_query_data[i]; + auto &query = m_occlusion_query_data[i]; glGenQueries(1, &handle); query.driver_handle = (u64)handle; @@ -853,6 +854,8 @@ void GLGSRender::on_init_thread() void GLGSRender::on_exit() { + zcull_ctrl.release(); + m_prog_buffer.clear(); if (draw_fbo) @@ -920,7 +923,7 @@ void GLGSRender::on_exit() for (u32 i = 0; i < occlusion_query_count; ++i) { - auto &query = occlusion_query_data[i]; + auto &query = m_occlusion_query_data[i]; query.active = false; query.pending = false; @@ -1424,7 +1427,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) return false; { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); } @@ -1452,7 +1455,7 @@ void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { m_gl_texture_cache.purge_dirty(); { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); } } @@ -1464,7 +1467,7 @@ void GLGSRender::do_local_task(bool /*idle*/) if (!work_queue.empty()) { - std::lock_guard lock(queue_guard); + std::lock_guard lock(queue_guard); work_queue.remove_if([](work_item &q) { return q.received; }); @@ -1505,7 +1508,7 @@ void GLGSRender::do_local_task(bool /*idle*/) work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data) { - std::lock_guard lock(queue_guard); + std::lock_guard lock(queue_guard); work_queue.emplace_back(); work_item &result = work_queue.back(); @@ -1537,31 +1540,38 @@ void GLGSRender::notify_tile_unbound(u32 tile) //m_rtts.invalidate_surface_address(addr, false); } -void GLGSRender::begin_occlusion_query(rsx::occlusion_query_info* query) +void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) { query->result = 0; glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle); } -void GLGSRender::end_occlusion_query(rsx::occlusion_query_info* query) +void GLGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query) { - glEndQuery(GL_ANY_SAMPLES_PASSED); + if (query->num_draws) + glEndQuery(GL_ANY_SAMPLES_PASSED); } -bool GLGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query) +bool GLGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query) { + if (!query->num_draws) + return true; + GLint status = GL_TRUE; glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status); return status != GL_FALSE; } -void GLGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) +void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query) { - GLint result; - glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result); + if (query->num_draws) + { + GLint result; + glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result); - query->result += result; + query->result += result; + } } void GLGSRender::shell_do_cleanup() diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index e19a707942..c853d5fded 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -265,7 +265,7 @@ struct driver_state } }; -class GLGSRender : public GSRender +class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control { private: GLFragmentProgram m_fragment_prog; @@ -311,7 +311,7 @@ private: std::vector m_overlay_cleanup_requests; - std::mutex queue_guard; + shared_mutex queue_guard; std::list work_queue; bool flush_draw_buffers = false; @@ -327,7 +327,7 @@ private: //vaos are mandatory for core profile gl::vao m_vao; - std::mutex m_sampler_mutex; + shared_mutex m_sampler_mutex; u64 surface_store_tag = 0; std::atomic_bool m_samplers_dirty = {true}; std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; @@ -363,10 +363,10 @@ public: bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; - void begin_occlusion_query(rsx::occlusion_query_info* query) override; - void end_occlusion_query(rsx::occlusion_query_info* query) override; - bool check_occlusion_query_status(rsx::occlusion_query_info* query) override; - void get_occlusion_query_result(rsx::occlusion_query_info* query) override; + void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; + void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; + bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override; + void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override; protected: void begin() override; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index cc4230a46e..2c2b169e71 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -318,8 +318,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk } } - if ((window_clip_width && window_clip_width != clip_horizontal) || - (window_clip_height && window_clip_height != clip_vertical)) + if ((window_clip_width && window_clip_width < clip_horizontal) || + (window_clip_height && window_clip_height < clip_vertical)) { LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d", window_clip_width, window_clip_height, clip_horizontal, clip_vertical); @@ -428,7 +428,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk framebuffer_status_valid = draw_fbo.check(); if (!framebuffer_status_valid) return; - check_zcull_status(true, false); + check_zcull_status(true); set_viewport(); switch (rsx::method_registers.surface_color_target()) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 49cbb909e3..e5f8008569 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -349,8 +349,8 @@ namespace rsx element_push_buffer.resize(0); - if (zcull_task_queue.active_query && zcull_task_queue.active_query->active) - zcull_task_queue.active_query->num_draws++; + if (zcull_ctrl->active) + zcull_ctrl->on_draw(); if (capture_current_frame) { @@ -365,6 +365,12 @@ namespace rsx reset(); + if (!zcull_ctrl) + { + //Backend did not provide an implementation, provide NULL object + zcull_ctrl = std::make_unique<::rsx::reports::ZCULL_control>(); + } + last_flip_time = get_system_time() - 1000000; thread_ctrl::spawn(m_vblank_thread, "VBlank Thread", [this]() @@ -503,6 +509,9 @@ namespace rsx //Execute backend-local tasks first do_local_task(ctrl->put.load() == internal_get.load()); + //Update sub-units + zcull_ctrl->update(this); + //Set up restore state if needed if (sync_point_request) { @@ -1140,6 +1149,12 @@ namespace rsx void thread::do_internal_task() { + if (zcull_ctrl->has_pending()) + { + zcull_ctrl->sync(this); + return; + } + if (m_internal_tasks.empty()) { std::this_thread::yield(); @@ -1147,7 +1162,7 @@ namespace rsx else { fmt::throw_exception("Disabled" HERE); - //std::lock_guard lock{ m_mtx_task }; + //std::lock_guard lock{ m_mtx_task }; //internal_task_entry &front = m_internal_tasks.front(); @@ -1161,7 +1176,7 @@ namespace rsx //std::future thread::add_internal_task(std::function callback) //{ - // std::lock_guard lock{ m_mtx_task }; + // std::lock_guard lock{ m_mtx_task }; // m_internal_tasks.emplace_back(callback); // return m_internal_tasks.back().promise.get_future(); @@ -2075,10 +2090,20 @@ namespace rsx skip_frame = (m_skip_frame_ctr < 0); } + //Reset zcull ctrl + zcull_ctrl->set_active(this, false); + zcull_ctrl->clear(); + + if (zcull_ctrl->has_pending()) + { + LOG_ERROR(RSX, "Dangling reports found, discarding..."); + zcull_ctrl->sync(this); + } + performance_counters.sampled_frames++; } - void thread::check_zcull_status(bool framebuffer_swap, bool force_read) + void thread::check_zcull_status(bool framebuffer_swap) { if (g_cfg.video.disable_zcull_queries) return; @@ -2108,35 +2133,8 @@ namespace rsx } } - occlusion_query_info* query = nullptr; - - if (zcull_task_queue.task_stack.size() > 0) - query = zcull_task_queue.active_query; - - if (query && query->active) - { - if (force_read || (!zcull_rendering_enabled || !testing_enabled || !zcull_surface_active)) - { - end_occlusion_query(query); - query->active = false; - query->pending = true; - } - } - else - { - if (zcull_rendering_enabled && testing_enabled && zcull_surface_active) - { - //Find query - u32 free_index = synchronize_zcull_stats(); - query = &occlusion_query_data[free_index]; - zcull_task_queue.add(query); - - begin_occlusion_query(query); - query->active = true; - query->result = 0; - query->num_draws = 0; - } - } + zcull_ctrl->set_enabled(this, zcull_rendering_enabled); + zcull_ctrl->set_active(this, zcull_rendering_enabled && testing_enabled && zcull_surface_active); } void thread::clear_zcull_stats(u32 type) @@ -2144,113 +2142,50 @@ namespace rsx if (g_cfg.video.disable_zcull_queries) return; - if (type == CELL_GCM_ZPASS_PIXEL_CNT) - { - if (zcull_task_queue.active_query && - zcull_task_queue.active_query->active && - zcull_task_queue.active_query->num_draws > 0) - { - //discard active query results - check_zcull_status(false, true); - zcull_task_queue.active_query->pending = false; + zcull_ctrl->clear(); + } - //re-enable cull stats if stats are enabled - check_zcull_status(false, false); - zcull_task_queue.active_query->num_draws = 0; + void thread::get_zcull_stats(u32 type, vm::addr_t sink) + { + u32 value = 0; + if (!g_cfg.video.disable_zcull_queries) + { + switch (type) + { + case CELL_GCM_ZPASS_PIXEL_CNT: + { + zcull_ctrl->read_report(this, sink, type); + return; } - - current_zcull_stats.clear(); - } - } - - u32 thread::get_zcull_stats(u32 type) - { - if (g_cfg.video.disable_zcull_queries) - return 0u; - - if (zcull_task_queue.active_query && - zcull_task_queue.active_query->active && - current_zcull_stats.zpass_pixel_cnt == 0 && - type == CELL_GCM_ZPASS_PIXEL_CNT) - { - //The zcull unit is still bound as the read is happening and there are no results ready - check_zcull_status(false, true); //close current query - check_zcull_status(false, false); //start new query since stat counting is still active - } - - switch (type) - { - case CELL_GCM_ZPASS_PIXEL_CNT: - { - if (current_zcull_stats.zpass_pixel_cnt > 0) - return UINT16_MAX; - - synchronize_zcull_stats(true); - return (current_zcull_stats.zpass_pixel_cnt > 0) ? UINT16_MAX : 0; - } - case CELL_GCM_ZCULL_STATS: - case CELL_GCM_ZCULL_STATS1: - case CELL_GCM_ZCULL_STATS2: - //TODO - return UINT16_MAX; - case CELL_GCM_ZCULL_STATS3: - { - //Some kind of inverse value - if (current_zcull_stats.zpass_pixel_cnt > 0) - return 0; - - synchronize_zcull_stats(true); - return (current_zcull_stats.zpass_pixel_cnt > 0) ? 0 : UINT16_MAX; - } - default: - LOG_ERROR(RSX, "Unknown zcull stat type %d", type); - return 0; - } - } - - u32 thread::synchronize_zcull_stats(bool hard_sync) - { - if (!zcull_rendering_enabled || zcull_task_queue.pending == 0) - return 0; - - u32 result = UINT16_MAX; - - for (auto &query : zcull_task_queue.task_stack) - { - if (query == nullptr || query->active) - continue; - - bool status = check_occlusion_query_status(query); - if (status == false && !hard_sync) - continue; - - get_occlusion_query_result(query); - current_zcull_stats.zpass_pixel_cnt += query->result; - - query->pending = false; - query = nullptr; - zcull_task_queue.pending--; - } - - for (u32 i = 0; i < occlusion_query_count; ++i) - { - auto &query = occlusion_query_data[i]; - if (!query.pending && !query.active) + case CELL_GCM_ZCULL_STATS: + case CELL_GCM_ZCULL_STATS1: + case CELL_GCM_ZCULL_STATS2: + case CELL_GCM_ZCULL_STATS3: { - result = i; + //TODO + value = (type != CELL_GCM_ZCULL_STATS3)? UINT16_MAX : 0; + break; + } + default: + LOG_ERROR(RSX, "Unknown zcull stat type %d", type); break; } } - if (result == UINT16_MAX && !hard_sync) - return synchronize_zcull_stats(true); + vm::ptr result = sink; + result->value = value; + result->padding = 0; + result->timer = timestamp(); + } - return result; + void thread::sync() + { + zcull_ctrl->sync(this); } void thread::notify_zcull_info_changed() { - check_zcull_status(false, false); + check_zcull_status(false); } //Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself! @@ -2356,4 +2291,385 @@ namespace rsx return false; } + + namespace reports + { + void ZCULL_control::set_enabled(class ::rsx::thread* ptimer, bool state) + { + if (state != enabled) + { + enabled = state; + + if (active && !enabled) + set_active(ptimer, false); + } + } + + void ZCULL_control::set_active(class ::rsx::thread* ptimer, bool state) + { + if (state != active) + { + active = state; + + if (state) + { + verify(HERE), enabled && m_current_task == nullptr; + allocate_new_query(ptimer); + begin_occlusion_query(m_current_task); + } + else + { + verify(HERE), m_current_task; + if (m_current_task->num_draws) + { + end_occlusion_query(m_current_task); + m_current_task->active = false; + m_current_task->pending = true; + + m_pending_writes.push_back({}); + m_pending_writes.back().query = m_current_task; + } + else + { + discard_occlusion_query(m_current_task); + m_current_task->active = false; + } + + m_current_task = nullptr; + } + } + } + + void ZCULL_control::read_report(::rsx::thread* ptimer, vm::addr_t sink, u32 type) + { + if (m_current_task) + { + m_current_task->owned = true; + end_occlusion_query(m_current_task); + m_pending_writes.push_back({}); + + m_current_task->active = false; + m_current_task->pending = true; + m_pending_writes.back().query = m_current_task; + + allocate_new_query(ptimer); + begin_occlusion_query(m_current_task); + } + else + { + //Spam; send null query down the pipeline to copy the last result + //Might be used to capture a timestamp (verify) + m_pending_writes.push_back({}); + } + + auto forwarder = &m_pending_writes.back(); + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); It++) + { + if (!It->sink) + { + It->counter_tag = m_statistics_tag_id; + It->due_tsc = m_tsc + m_cycles_delay; + It->sink = sink; + It->type = type; + + if (forwarder != &(*It)) + { + //Not the last one in the chain, forward the writing operation to the last writer + It->forwarder = forwarder; + It->query->owned = true; + } + + continue; + } + + break; + } + } + + void ZCULL_control::allocate_new_query(::rsx::thread* ptimer) + { + int retries = 0; + while (!Emu.IsStopped()) + { + for (int n = 0; n < occlusion_query_count; ++n) + { + if (m_occlusion_query_data[n].pending || m_occlusion_query_data[n].active) + continue; + + m_current_task = &m_occlusion_query_data[n]; + m_current_task->num_draws = 0; + m_current_task->result = 0; + m_current_task->sync_timestamp = 0; + m_current_task->active = true; + m_current_task->owned = false; + return; + } + + if (retries > 0) + { + LOG_ERROR(RSX, "ZCULL report queue is overflowing!!"); + m_statistics_map[m_statistics_tag_id] = 1; + + verify(HERE), m_pending_writes.front().sink == 0; + m_pending_writes.resize(0); + + for (auto &query : m_occlusion_query_data) + { + discard_occlusion_query(&query); + query.pending = false; + } + + m_current_task = &m_occlusion_query_data[0]; + m_current_task->num_draws = 0; + m_current_task->result = 0; + m_current_task->sync_timestamp = 0; + m_current_task->active = true; + m_current_task->owned = false; + return; + } + + //All slots are occupied, try to pop the earliest entry + m_tsc += max_zcull_cycles_delay; + update(ptimer); + + retries++; + } + } + + void ZCULL_control::clear() + { + if (!m_pending_writes.empty()) + { + //Remove any dangling/unclaimed queries as the information is lost anyway + auto valid_size = m_pending_writes.size(); + for (auto It = m_pending_writes.rbegin(); It != m_pending_writes.rend(); ++It) + { + if (!It->sink) + { + discard_occlusion_query(It->query); + It->query->pending = false; + valid_size--; + continue; + } + + break; + } + + m_pending_writes.resize(valid_size); + } + + m_statistics_tag_id++; + m_statistics_map[m_statistics_tag_id] = 0; + } + + void ZCULL_control::on_draw() + { + if (m_current_task) + m_current_task->num_draws++; + + m_cycles_delay = max_zcull_cycles_delay; + } + + void ZCULL_control::write(vm::addr_t sink, u32 timestamp, u32 value) + { + verify(HERE), sink; + vm::ptr out = sink; + out->value = value; + out->timer = timestamp; + out->padding = 0; + } + + void ZCULL_control::sync(::rsx::thread* ptimer) + { + if (!m_pending_writes.empty()) + { + u32 processed = 0; + const bool has_unclaimed = (m_pending_writes.back().sink == 0); + + //Write all claimed reports unconditionally + for (auto &writer : m_pending_writes) + { + if (!writer.sink) + break; + + auto query = writer.query; + u32 result = m_statistics_map[writer.counter_tag]; + + if (query) + { + verify(HERE), query->pending; + + if (!result && query->num_draws) + { + get_occlusion_query_result(query); + + if (query->result) + { + result += query->result; + m_statistics_map[writer.counter_tag] = result; + } + } + else + { + //Already have a hit, no need to retest + discard_occlusion_query(query); + } + + query->pending = false; + } + + if (!writer.forwarder) + //No other queries in the chain, write result + write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0); + + processed++; + } + + if (!has_unclaimed) + { + verify(HERE), processed == m_pending_writes.size(); + m_pending_writes.resize(0); + } + else + { + auto remaining = m_pending_writes.size() - processed; + verify(HERE), remaining > 0; + + if (remaining == 1) + { + m_pending_writes.front() = m_pending_writes.back(); + m_pending_writes.resize(1); + } + else + { + std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin()); + m_pending_writes.resize(remaining); + } + } + + //Delete all statistics caches but leave the current one + for (auto It = m_statistics_map.begin(); It != m_statistics_map.end(); ) + { + if (It->first == m_statistics_tag_id) + ++It; + else + It = m_statistics_map.erase(It); + } + } + + //Critical, since its likely a WAIT_FOR_IDLE type has been processed, all results are considered available + m_cycles_delay = 2; + } + + void ZCULL_control::update(::rsx::thread* ptimer) + { + m_tsc++; + + if (m_pending_writes.empty()) + return; + + u32 stat_tag_to_remove = m_statistics_tag_id; + u32 processed = 0; + for (auto &writer : m_pending_writes) + { + if (!writer.sink) + break; + + if (writer.counter_tag != stat_tag_to_remove && + stat_tag_to_remove != m_statistics_tag_id) + { + //If the stat id is different from this stat id and the queue is advancing, + //its guaranteed that the previous tag has no remaining writes as the queue is ordered + m_statistics_map.erase(stat_tag_to_remove); + stat_tag_to_remove = m_statistics_tag_id; + } + + auto query = writer.query; + u32 result = m_statistics_map[writer.counter_tag]; + + if (query) + { + verify(HERE), query->pending; + + if (UNLIKELY(writer.due_tsc < m_tsc)) + { + if (!result && query->num_draws) + { + get_occlusion_query_result(query); + + if (query->result) + { + result += query->result; + m_statistics_map[writer.counter_tag] = result; + } + } + else + { + //No need to read this + discard_occlusion_query(query); + } + } + else + { + if (result || !query->num_draws) + { + //Not necessary to read the result anymore + discard_occlusion_query(query); + } + else + { + //Maybe we get lucky and results are ready + if (check_occlusion_query_status(query)) + { + get_occlusion_query_result(query); + if (query->result) + { + result += query->result; + m_statistics_map[writer.counter_tag] = result; + } + } + else + { + //Too early; abort + break; + } + } + } + + query->pending = false; + } + + stat_tag_to_remove = writer.counter_tag; + + //only zpass supported right now + if (!writer.forwarder) + //No other queries in the chain, write result + write(writer.sink, ptimer->timestamp(), result ? UINT16_MAX : 0); + + processed++; + } + + if (stat_tag_to_remove != m_statistics_tag_id) + m_statistics_map.erase(stat_tag_to_remove); + + if (processed) + { + auto remaining = m_pending_writes.size() - processed; + if (remaining == 1) + { + m_pending_writes.front() = m_pending_writes.back(); + m_pending_writes.resize(1); + } + else if (remaining) + { + std::move(m_pending_writes.begin() + processed, m_pending_writes.end(), m_pending_writes.begin()); + m_pending_writes.resize(remaining); + } + else + { + m_pending_writes.resize(0); + } + } + } + } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 0f0aded8ba..b82de85e06 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -157,64 +157,91 @@ namespace rsx std::array attribute_placement; }; - struct zcull_statistics + namespace reports { - u32 zpass_pixel_cnt; - u32 zcull_stats; - u32 zcull_stats1; - u32 zcull_stats2; - u32 zcull_stats3; - - void clear() + struct occlusion_query_info { - zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; - } - }; + u32 driver_handle; + u32 result; + u32 num_draws; + bool pending; + bool active; + bool owned; - struct occlusion_query_info - { - u32 driver_handle; - u32 result; - u32 num_draws; - bool pending; - bool active; + u64 sync_timestamp; + }; - u64 sync_timestamp; - u64 external_flags; - }; - - struct occlusion_task - { - std::vector task_stack; - occlusion_query_info* active_query = nullptr; - u32 pending = 0; - - //Add one query to the task - void add(occlusion_query_info* query) + struct queued_report_write { - active_query = query; + u32 type = CELL_GCM_ZPASS_PIXEL_CNT; + u32 counter_tag; + occlusion_query_info* query; + queued_report_write* forwarder; + vm::addr_t sink; - if (task_stack.size() > 0 && pending == 0) - task_stack.resize(0); + u32 due_tsc; + }; - const auto empty_slots = task_stack.size() - pending; - if (empty_slots >= 4) - { - for (auto &_query : task_stack) - { - if (_query == nullptr) - { - _query = query; - pending++; - return; - } - } - } + struct ZCULL_control + { + //Delay in 'cycles' before a report update operation is forced to retire + //Larger values might give more performance but some engines (UE3) dont seem to wait for results and will flicker + //TODO: Determine the real max delay in real hardware + const u32 max_zcull_cycles_delay = 10; - task_stack.push_back(query); - pending++; - } - }; + //Number of occlusion query slots available. Real hardware actually has far fewer units before choking + const u32 occlusion_query_count = 128; + + bool active = false; + bool enabled = false; + + std::array m_occlusion_query_data = {}; + + occlusion_query_info* m_current_task = nullptr; + u32 m_statistics_tag_id = 0; + u32 m_tsc = 0; + u32 m_cycles_delay = 10; + + std::vector m_pending_writes; + std::unordered_map m_statistics_map; + + ZCULL_control() {} + ~ZCULL_control() {} + + void set_enabled(class ::rsx::thread* ptimer, bool enabled); + void set_active(class ::rsx::thread* ptimer, bool active); + + void write(vm::addr_t sink, u32 timestamp, u32 value); + + //Read current zcull statistics into the address provided + void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type); + + //Sets up a new query slot and sets it to the current task + void allocate_new_query(class ::rsx::thread* ptimer); + + //clears current stat block and increments stat_tag_id + void clear(); + + //forcefully flushes all + void sync(class ::rsx::thread* ptimer); + + //call once every 'tick' to update + void update(class ::rsx::thread* ptimer); + + //Draw call notification + void on_draw(); + + //Check for pending writes + bool has_pending() const { return (m_pending_writes.size() != 0); } + + //Backend methods (optional, will return everything as always visible by default) + virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} + virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} + virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; } + virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; } + virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {} + }; + } struct sampled_image_descriptor_base; @@ -236,11 +263,7 @@ namespace rsx //occlusion query bool zcull_surface_active = false; - zcull_statistics current_zcull_stats; - - const u32 occlusion_query_count = 128; - std::array occlusion_query_data = {}; - occlusion_task zcull_task_queue = {}; + std::unique_ptr zcull_ctrl; //framebuffer setup rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count]; @@ -382,17 +405,14 @@ namespace rsx virtual void notify_tile_unbound(u32 /*tile*/) {} //zcull - virtual void notify_zcull_info_changed(); - virtual void clear_zcull_stats(u32 type); - virtual u32 get_zcull_stats(u32 type); - virtual void check_zcull_status(bool framebuffer_swap, bool force_read); - virtual u32 synchronize_zcull_stats(bool hard_sync = false); - - virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} - virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} - virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; } - virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; } + void notify_zcull_info_changed(); + void clear_zcull_stats(u32 type); + void check_zcull_status(bool framebuffer_swap); + void get_zcull_stats(u32 type, vm::addr_t sink); + //sync + void sync(); + gsl::span get_raw_index_array(const std::vector >& draw_indexed_clause) const; gsl::span get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector>& vertex_ranges) const; @@ -433,7 +453,7 @@ namespace rsx void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data); private: - std::mutex m_mtx_task; + shared_mutex m_mtx_task; struct internal_task_entry { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b4e6ec66ba..acc3466a32 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -566,7 +566,7 @@ VKGSRender::VKGSRender() : GSRender() //Occlusion m_occlusion_query_pool.create((*m_device), DESCRIPTOR_MAX_DRAW_CALLS); //Enough for 4k draw calls per pass for (int n = 0; n < 128; ++n) - occlusion_query_data[n].driver_handle = n; + m_occlusion_query_data[n].driver_handle = n; //Generate frame contexts VkDescriptorPoolSize uniform_buffer_pool = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }; @@ -769,7 +769,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) { vk::texture_cache::thrashed_set result; { - std::lock_guard lock(m_secondary_cb_guard); + std::lock_guard lock(m_secondary_cb_guard); result = std::move(m_texture_cache.invalidate_address(address, is_writing, false, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue())); } @@ -777,7 +777,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) return false; { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); } @@ -795,7 +795,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) vm::temporary_unlock(); - std::lock_guard lock(m_flush_queue_mutex); + std::lock_guard lock(m_flush_queue_mutex); m_flush_requests.post(sync_timestamp == 0ull); has_queue_ref = true; @@ -846,13 +846,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { - std::lock_guard lock(m_secondary_cb_guard); + std::lock_guard lock(m_secondary_cb_guard); if (m_texture_cache.invalidate_range(address_base, size, true, true, false, m_secondary_command_buffer, m_memory_type_mapping, m_swapchain->get_graphics_queue()).violation_handled) { m_texture_cache.purge_dirty(); { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); } } @@ -866,7 +866,7 @@ void VKGSRender::notify_tile_unbound(u32 tile) //m_rtts.invalidate_surface_address(addr, false); { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); m_samplers_dirty.store(true); } } @@ -903,6 +903,7 @@ void VKGSRender::check_heap_status() m_attrib_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_current_frame->reset_heap_ptrs(); + m_last_heap_sync_time = get_system_time(); } else { @@ -1063,7 +1064,7 @@ void VKGSRender::end() std::chrono::time_point textures_start = vertex_end; //Load textures { - std::lock_guard lock(m_sampler_mutex); + std::lock_guard lock(m_sampler_mutex); bool update_framebuffer_sourced = false; if (surface_store_tag != m_rtts.cache_tag) @@ -1356,40 +1357,15 @@ void VKGSRender::end() occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { - bool free_slot_found = false; - u32 index_to_free = UINT32_MAX; - u64 earliest_timestamp = UINT64_MAX; + m_tsc += 100; + update(this); - //flush occlusion queries - for (auto It : m_occlusion_map) + occlusion_id = m_occlusion_query_pool.find_free_slot(); + if (occlusion_id == UINT32_MAX) { - u32 index = It.first; - auto query = &occlusion_query_data[index]; - if (check_occlusion_query_status(query)) - { - free_slot_found = true; - get_occlusion_query_result(query); - break; - } - - if (query->sync_timestamp < earliest_timestamp) - { - index_to_free = index; - earliest_timestamp = query->sync_timestamp; - } + LOG_ERROR(RSX, "Occlusion pool overflow"); + if (m_current_task) m_current_task->result = 1; } - - if (free_slot_found) - { - occlusion_id = m_occlusion_query_pool.find_free_slot(); - } - else - { - get_occlusion_query_result(&occlusion_query_data[index_to_free]); - occlusion_id = m_occlusion_query_pool.find_free_slot(); - } - - verify(HERE), occlusion_id != UINT32_MAX; } } @@ -1441,7 +1417,7 @@ void VKGSRender::end() const bool is_emulated_restart = (!primitive_emulated && rsx::method_registers.restart_index_enabled() && vk::emulate_primitive_restart() && rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed); const bool single_draw = !supports_multidraw || (!is_emulated_restart && (rsx::method_registers.current_draw_clause.first_count_commands.size() <= 1 || rsx::method_registers.current_draw_clause.is_disjoint_primitive)); - if (m_occlusion_query_active) + if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) { //Begin query m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); @@ -1500,7 +1476,7 @@ void VKGSRender::end() } } - if (m_occlusion_query_active) + if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) { //End query m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); @@ -1565,6 +1541,7 @@ void VKGSRender::on_init_thread() GSRender::on_init_thread(); rsx_thread = std::this_thread::get_id(); + zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this)); if (!supports_native_ui) { @@ -1627,6 +1604,7 @@ void VKGSRender::on_init_thread() void VKGSRender::on_exit() { + zcull_ctrl.release(); return GSRender::on_exit(); } @@ -2002,7 +1980,7 @@ void VKGSRender::do_local_task(bool /*idle*/) { if (m_flush_requests.pending()) { - std::lock_guard lock(m_flush_queue_mutex); + std::lock_guard lock(m_flush_queue_mutex); //TODO: Determine if a hard sync is necessary //Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline @@ -2610,8 +2588,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) } } - if ((window_clip_width && window_clip_width != clip_width) || - (window_clip_height && window_clip_height != clip_height)) + if ((window_clip_width && window_clip_width < clip_width) || + (window_clip_height && window_clip_height < clip_height)) { LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d", window_clip_width, window_clip_height, clip_width, clip_height); @@ -2818,7 +2796,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, fbo_width, fbo_height, std::move(fbo_images))); } - check_zcull_status(true, false); + check_zcull_status(true); } void VKGSRender::reinitialize_swapchain() @@ -3194,7 +3172,7 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst check_heap_status(); //Stop all parallel operations until this is finished - std::lock_guard lock(m_secondary_cb_guard); + std::lock_guard lock(m_secondary_cb_guard); auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); m_current_command_buffer->begin(); @@ -3240,31 +3218,32 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst return false; } -void VKGSRender::clear_zcull_stats(u32 type) -{ - rsx::thread::clear_zcull_stats(type); - m_occlusion_map.clear(); - m_occlusion_query_pool.reset_all(*m_current_command_buffer); -} - -void VKGSRender::begin_occlusion_query(rsx::occlusion_query_info* query) +void VKGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query) { query->result = 0; - query->sync_timestamp = get_system_time(); + //query->sync_timestamp = get_system_time(); m_active_query_info = query; m_occlusion_query_active = true; } -void VKGSRender::end_occlusion_query(rsx::occlusion_query_info* query) +void VKGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query) { m_occlusion_query_active = false; m_active_query_info = nullptr; - flush_command_queue(); + //Avoid stalling later if this query is already tied to a report + if (query->num_draws && query->owned && !m_flush_requests.pending()) + { + m_flush_requests.post(false); + m_flush_requests.remove_one(); + } } -bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query) +bool VKGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query) { + if (!query->num_draws) + return true; + auto found = m_occlusion_map.find(query->driver_handle); if (found == m_occlusion_map.end()) return true; @@ -3274,16 +3253,26 @@ bool VKGSRender::check_occlusion_query_status(rsx::occlusion_query_info* query) return true; if (data.command_buffer_to_wait == m_current_command_buffer) + { + if (!m_flush_requests.pending()) + { + //Likely to be read at some point in the near future, submit now to avoid stalling later + m_flush_requests.post(false); + m_flush_requests.remove_one(); + } + return false; + } if (data.command_buffer_to_wait->pending) + //Don't bother poking the state, a flush later will likely do it for free return false; u32 oldest = data.indices.front(); return m_occlusion_query_pool.check_query_status(oldest); } -void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) +void VKGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query) { auto found = m_occlusion_map.find(query->driver_handle); if (found == m_occlusion_map.end()) @@ -3293,20 +3282,32 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) if (data.indices.size() == 0) return; - if (data.command_buffer_to_wait == m_current_command_buffer) - flush_command_queue(); //Should hard sync, but this should almost never ever happen - - if (data.command_buffer_to_wait->pending) - data.command_buffer_to_wait->wait(); - - //Gather data - for (const auto occlusion_id : data.indices) + if (query->num_draws) { - //We only need one hit - if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id)) + if (data.command_buffer_to_wait == m_current_command_buffer) { - query->result = 1; - break; + flush_command_queue(); + + //Clear any deferred flush requests from previous call to get_query_status() + if (m_flush_requests.pending()) + { + m_flush_requests.clear_pending_flag(); + m_flush_requests.consumer_wait(); + } + } + + if (data.command_buffer_to_wait->pending) + data.command_buffer_to_wait->wait(); + + //Gather data + for (const auto occlusion_id : data.indices) + { + //We only need one hit + if (auto value = m_occlusion_query_pool.get_query_result(occlusion_id)) + { + query->result = 1; + break; + } } } @@ -3314,6 +3315,26 @@ void VKGSRender::get_occlusion_query_result(rsx::occlusion_query_info* query) m_occlusion_map.erase(query->driver_handle); } +void VKGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query) +{ + if (m_active_query_info == query) + { + m_occlusion_query_active = false; + m_active_query_info = nullptr; + } + + auto found = m_occlusion_map.find(query->driver_handle); + if (found == m_occlusion_map.end()) + return; + + auto &data = found->second; + if (data.indices.size() == 0) + return; + + m_occlusion_query_pool.reset_queries(*m_current_command_buffer, data.indices); + m_occlusion_map.erase(query->driver_handle); +} + void VKGSRender::shell_do_cleanup() { //TODO: Guard this diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 797896733f..2596b71345 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -38,7 +38,7 @@ namespace vk //Heap allocation sizes in MB //NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px) -#define VK_ATTRIB_RING_BUFFER_SIZE_M 256 +#define VK_ATTRIB_RING_BUFFER_SIZE_M 384 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 #define VK_UBO_RING_BUFFER_SIZE_M 128 #define VK_INDEX_RING_BUFFER_SIZE_M 64 @@ -55,7 +55,7 @@ struct command_buffer_chunk: public vk::command_buffer std::atomic_bool pending = { false }; std::atomic last_sync = { 0 }; - std::mutex guard_mutex; + shared_mutex guard_mutex; command_buffer_chunk() {} @@ -97,7 +97,7 @@ struct command_buffer_chunk: public vk::command_buffer { if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS) { - std::lock_guard lock(guard_mutex); + std::lock_guard lock(guard_mutex); if (pending) { @@ -111,7 +111,7 @@ struct command_buffer_chunk: public vk::command_buffer void wait() { - std::lock_guard lock(guard_mutex); + std::lock_guard lock(guard_mutex); if (!pending) return; @@ -244,7 +244,7 @@ struct flush_request_task } }; -class VKGSRender : public GSRender +class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control { private: VKFragmentProgram m_fragment_prog; @@ -265,7 +265,7 @@ private: std::unique_ptr m_depth_scaler; std::unique_ptr m_ui_renderer; - std::mutex m_sampler_mutex; + shared_mutex m_sampler_mutex; u64 surface_store_tag = 0; std::atomic_bool m_samplers_dirty = { true }; std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; @@ -292,10 +292,10 @@ private: vk::command_pool m_command_buffer_pool; vk::occlusion_query_pool m_occlusion_query_pool; bool m_occlusion_query_active = false; - rsx::occlusion_query_info *m_active_query_info = nullptr; + rsx::reports::occlusion_query_info *m_active_query_info = nullptr; std::unordered_map m_occlusion_map; - std::mutex m_secondary_cb_guard; + shared_mutex m_secondary_cb_guard; vk::command_pool m_secondary_command_buffer_pool; vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations @@ -346,7 +346,7 @@ private: bool m_flush_draw_buffers = false; std::atomic m_last_flushable_cb = {-1 }; - std::mutex m_flush_queue_mutex; + shared_mutex m_flush_queue_mutex; flush_request_task m_flush_requests; std::thread::id rsx_thread; @@ -400,11 +400,11 @@ public: void write_buffers(); void set_viewport(); - void clear_zcull_stats(u32 type) override; - void begin_occlusion_query(rsx::occlusion_query_info* query) override; - void end_occlusion_query(rsx::occlusion_query_info* query) override; - bool check_occlusion_query_status(rsx::occlusion_query_info* query) override; - void get_occlusion_query_result(rsx::occlusion_query_info* query) override; + void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; + void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; + bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override; + void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override; + void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override; protected: void begin() override; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 4171af71ea..a424604fb7 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "VKHelpers.h" -#include +#include "Utilities/mutex.h" namespace vk { @@ -24,7 +24,7 @@ namespace vk u64 g_num_total_frames = 0; //global submit guard to prevent race condition on queue submit - std::mutex g_submit_mutex; + shared_mutex g_submit_mutex; VKAPI_ATTR void* VKAPI_CALL mem_realloc(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index ac115cea5f..3dc556b911 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -57,6 +57,7 @@ namespace rsx { void set_reference(thread* rsx, u32 _reg, u32 arg) { + rsx->sync(); rsx->ctrl->ref.exchange(arg); } @@ -112,6 +113,7 @@ namespace rsx void semaphore_release(thread* rsx, u32 _reg, u32 arg) { + rsx->sync(); rsx->sync_point_request = true; const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e()); @@ -164,6 +166,8 @@ namespace rsx { // } + + rsx->sync(); auto& sema = vm::_ref(rsx->label_addr); sema.semaphore[index].val = arg; sema.semaphore[index].pad = 0; @@ -177,8 +181,9 @@ namespace rsx { // } - u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); + rsx->sync(); + u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff); auto& sema = vm::_ref(rsx->label_addr); sema.semaphore[index].val = val; sema.semaphore[index].pad = 0; @@ -433,16 +438,14 @@ namespace rsx case CELL_GCM_ZCULL_STATS1: case CELL_GCM_ZCULL_STATS2: case CELL_GCM_ZCULL_STATS3: - result->value = rsx->get_zcull_stats(type); - LOG_WARNING(RSX, "NV4097_GET_REPORT: Unimplemented type %d", type); + rsx->get_zcull_stats(type, address_ptr); break; default: LOG_ERROR(RSX, "NV4097_GET_REPORT: Bad type %d", type); + result->timer = rsx->timestamp(); + result->padding = 0; break; } - - result->timer = rsx->timestamp(); - result->padding = 0; } void clear_report_value(thread* rsx, u32 _reg, u32 arg) @@ -450,10 +453,7 @@ namespace rsx switch (arg) { case CELL_GCM_ZPASS_PIXEL_CNT: - LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZPASS_PIXEL_CNT"); - break; case CELL_GCM_ZCULL_STATS: - LOG_WARNING(RSX, "TODO: NV4097_CLEAR_REPORT_VALUE: ZCULL_STATS"); break; default: LOG_ERROR(RSX, "NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg); @@ -492,6 +492,7 @@ namespace rsx return; } + rsx->sync(); vm::ptr result = address_ptr; rsx->conditional_render_test_failed = (result->value == 0); } @@ -514,6 +515,11 @@ namespace rsx rsx->notify_zcull_info_changed(); } + void sync(thread* rsx, u32, u32) + { + rsx->sync(); + } + void set_surface_dirty_bit(thread* rsx, u32, u32) { rsx->m_rtts_dirty = true; @@ -1678,6 +1684,9 @@ namespace rsx bind(); bind(); bind(); + bind(); + bind(); + bind(); //NV308A bind_range();