From 9ec23371927a87813518896b11632202d43fd5df Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 15 Jan 2018 22:28:25 +0300 Subject: [PATCH] rsx: Synchronization improvements - Always flush the primary queue and wait if not involking readback from rsx thread -- Should fix some instances of device_lost when using WCB -- Marked remaining case as TODO -- TODO: optimize amount of time rsx waits for external threads trying to read --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 96 ++++++----------- rpcs3/Emu/RSX/VK/VKGSRender.h | 181 +++++++++++++++++++++----------- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 4 +- 3 files changed, 154 insertions(+), 127 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8293750951..e86014be0c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -811,7 +811,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (!is_rsxthr) { + //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) vm::temporary_unlock(); + + std::lock_guard lock(m_flush_queue_mutex); + + m_flush_requests.post(sync_timestamp == 0ull); + has_queue_ref = true; } else { @@ -821,67 +827,36 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) if (sync_timestamp > 0) { - //Wait for any cb submitted after the sync timestamp to finish - while (true) + //Wait for earliest cb submitted after the sync timestamp to finish + command_buffer_chunk *target_cb = nullptr; + for (auto &cb : m_primary_cb_list) { - u32 pending = 0; - - if (m_last_flushable_cb < 0) - break; - - for (auto &cb : m_primary_cb_list) + if (cb.pending && cb.last_sync >= sync_timestamp) { - if (!cb.pending && cb.last_sync >= sync_timestamp) - { - pending = 0; - break; - } - - if (cb.pending) - { - pending++; - - if (is_rsxthr) - cb.poke(); - } + if (target_cb == nullptr || target_cb->last_sync > cb.last_sync) + target_cb = &cb; } - - if (!pending) - break; - - std::this_thread::yield(); } + if (target_cb) + target_cb->wait(); + if (is_rsxthr) m_last_flushable_cb = -1; } - else + + if (has_queue_ref) { - if (!is_rsxthr) - { - { - std::lock_guard lock(m_flush_queue_mutex); - - m_flush_commands = true; - m_queued_threads++; - } - - //Wait for the RSX thread to process - while (m_flush_commands) - { - _mm_lfence(); - _mm_pause(); - } - - has_queue_ref = true; - } + //Wait for the RSX thread to process request if it hasn't already + m_flush_requests.producer_wait(); } m_texture_cache.flush_all(result, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); if (has_queue_ref) { - m_queued_threads--; + //Release RSX thread + m_flush_requests.remove_one(); } } @@ -1855,7 +1830,7 @@ void VKGSRender::flush_command_queue(bool hard_sync) } m_last_flushable_cb = -1; - m_flush_commands = false; + m_flush_requests.clear_pending_flag(); } else { @@ -2037,15 +2012,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources) void VKGSRender::do_local_task(bool idle) { - //TODO: Guard this - if (m_overlay_cleanup_requests.size()) - { - flush_command_queue(true); - m_ui_renderer->remove_temp_resources(); - m_overlay_cleanup_requests.clear(); - } - - if (m_flush_commands) + if (m_flush_requests.pending()) { std::lock_guard lock(m_flush_queue_mutex); @@ -2053,12 +2020,8 @@ void VKGSRender::do_local_task(bool idle) //Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline flush_command_queue(); - m_flush_commands = false; - while (m_queued_threads) - { - _mm_lfence(); - _mm_pause(); - } + m_flush_requests.clear_pending_flag(); + m_flush_requests.consumer_wait(); } if (m_last_flushable_cb > -1) @@ -2151,7 +2114,14 @@ void VKGSRender::do_local_task(bool idle) #endif - if (m_custom_ui) + //TODO: Guard this + if (m_overlay_cleanup_requests.size()) + { + flush_command_queue(true); + m_ui_renderer->remove_temp_resources(); + m_overlay_cleanup_requests.clear(); + } + else if (m_custom_ui) { if (!in_begin_end && native_ui_flip_request.load()) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 976e02306a..4ecb9daa2c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -43,6 +43,7 @@ struct command_buffer_chunk: public vk::command_buffer std::atomic_bool pending = { false }; std::atomic last_sync = { 0 }; + std::mutex guard_mutex; command_buffer_chunk() {} @@ -84,8 +85,13 @@ struct command_buffer_chunk: public vk::command_buffer { if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS) { - vkResetFences(m_device, 1, &submit_fence); - pending = false; + std::lock_guard lock(guard_mutex); + + if (pending) + { + vkResetFences(m_device, 1, &submit_fence); + pending = false; + } } return !pending; @@ -93,6 +99,8 @@ struct command_buffer_chunk: public vk::command_buffer void wait() { + std::lock_guard lock(guard_mutex); + if (!pending) return; @@ -116,6 +124,114 @@ struct occlusion_data command_buffer_chunk* command_buffer_to_wait = nullptr; }; +struct frame_context_t +{ + VkSemaphore present_semaphore = VK_NULL_HANDLE; + VkDescriptorSet descriptor_set = VK_NULL_HANDLE; + vk::descriptor_pool descriptor_pool; + u32 used_descriptors = 0; + + std::vector> buffer_views_to_clean; + std::vector> samplers_to_clean; + + u32 present_image = UINT32_MAX; + command_buffer_chunk* swap_command_buffer = nullptr; + + //Heap pointers + s64 attrib_heap_ptr = 0; + s64 ubo_heap_ptr = 0; + s64 index_heap_ptr = 0; + s64 texture_upload_heap_ptr = 0; + + u64 last_frame_sync_time = 0; + + //Copy shareable information + void grab_resources(frame_context_t &other) + { + present_semaphore = other.present_semaphore; + descriptor_set = other.descriptor_set; + descriptor_pool = other.descriptor_pool; + used_descriptors = other.used_descriptors; + + attrib_heap_ptr = other.attrib_heap_ptr; + ubo_heap_ptr = other.attrib_heap_ptr; + index_heap_ptr = other.attrib_heap_ptr; + texture_upload_heap_ptr = other.texture_upload_heap_ptr; + } + + //Exchange storage (non-copyable) + void swap_storage(frame_context_t &other) + { + std::swap(buffer_views_to_clean, other.buffer_views_to_clean); + std::swap(samplers_to_clean, other.samplers_to_clean); + } + + void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) + { + attrib_heap_ptr = attrib_loc; + ubo_heap_ptr = ubo_loc; + index_heap_ptr = index_loc; + texture_upload_heap_ptr = texture_loc; + + last_frame_sync_time = get_system_time(); + } + + void reset_heap_ptrs() + { + last_frame_sync_time = 0; + } +}; + +struct flush_request_task +{ + atomic_t pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request + atomic_t num_waiters{ 0 }; //Number of threads waiting for this request to be serviced + bool hard_sync = false; + + flush_request_task(){} + + void post(bool _hard_sync) + { + hard_sync = (hard_sync || _hard_sync); + pending_state = true; + num_waiters++; + } + + void remove_one() + { + num_waiters--; + } + + void clear_pending_flag() + { + hard_sync = false; + pending_state.store(false); + } + + bool pending() const + { + return pending_state.load(); + } + + void consumer_wait() const + { + while (num_waiters.load() != 0) + { + _mm_lfence(); + _mm_pause(); + } + } + + void producer_wait() const + { + while (pending_state.load()) + { + _mm_lfence(); + _mm_pause(); + } + } +}; + class VKGSRender : public GSRender { private: @@ -191,64 +307,6 @@ private: vk::vk_data_heap m_index_buffer_ring_info; vk::vk_data_heap m_texture_upload_buffer_ring_info; - struct frame_context_t - { - VkSemaphore present_semaphore = VK_NULL_HANDLE; - VkDescriptorSet descriptor_set = VK_NULL_HANDLE; - vk::descriptor_pool descriptor_pool; - u32 used_descriptors = 0; - - std::vector> buffer_views_to_clean; - std::vector> samplers_to_clean; - - u32 present_image = UINT32_MAX; - command_buffer_chunk* swap_command_buffer = nullptr; - - //Heap pointers - s64 attrib_heap_ptr = 0; - s64 ubo_heap_ptr = 0; - s64 index_heap_ptr = 0; - s64 texture_upload_heap_ptr = 0; - - u64 last_frame_sync_time = 0; - - //Copy shareable information - void grab_resources(frame_context_t &other) - { - present_semaphore = other.present_semaphore; - descriptor_set = other.descriptor_set; - descriptor_pool = other.descriptor_pool; - used_descriptors = other.used_descriptors; - - attrib_heap_ptr = other.attrib_heap_ptr; - ubo_heap_ptr = other.attrib_heap_ptr; - index_heap_ptr = other.attrib_heap_ptr; - texture_upload_heap_ptr = other.texture_upload_heap_ptr; - } - - //Exchange storage (non-copyable) - void swap_storage(frame_context_t &other) - { - std::swap(buffer_views_to_clean, other.buffer_views_to_clean); - std::swap(samplers_to_clean, other.samplers_to_clean); - } - - void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) - { - attrib_heap_ptr = attrib_loc; - ubo_heap_ptr = ubo_loc; - index_heap_ptr = index_loc; - texture_upload_heap_ptr = texture_loc; - - last_frame_sync_time = get_system_time(); - } - - void reset_heap_ptrs() - { - last_frame_sync_time = 0; - } - }; - std::array frame_context_storage; //Temp frame context to use if the real frame queue is overburdened. Only used for storage frame_context_t m_aux_frame_context; @@ -277,8 +335,7 @@ private: std::atomic m_last_flushable_cb = {-1 }; std::mutex m_flush_queue_mutex; - std::atomic m_flush_commands = { false }; - std::atomic m_queued_threads = { 0 }; + flush_request_task m_flush_requests; std::thread::id rsx_thread; std::atomic m_last_sync_event = { 0 }; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index da8e9a498d..0d8dad1ca2 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -13,8 +13,8 @@ namespace vk VkSampler g_null_sampler = nullptr; - bool g_cb_no_interrupt_flag = false; - bool g_drv_no_primitive_restart_flag = false; + atomic_t g_cb_no_interrupt_flag { false }; + atomic_t g_drv_no_primitive_restart_flag { false }; u64 g_num_processed_frames = 0; u64 g_num_total_frames = 0;