diff --git a/rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp b/rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp index 4dd6bb7cec..d1e72e4214 100644 --- a/rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp +++ b/rpcs3/Emu/RSX/VK/VKAsyncScheduler.cpp @@ -30,6 +30,17 @@ namespace vk { for (auto&& job : m_event_queue.pop_all()) { + if (job->type == xqueue_event_type::barrier) + { + // Blocks the queue from progressing until the work items are actually submitted to the GPU + // Avoids spamming the GPU with event requests when the events have not even been submitted yet + while (job->completion_eid == m_submit_count.load()) + { + thread_ctrl::wait_for(100); + } + continue; + } + vk::wait_for_event(job->queue1_signal.get(), GENERAL_WAIT_TIMEOUT); job->queue2_signal->host_signal(); } @@ -62,7 +73,12 @@ namespace vk { auto ev1 = std::make_unique(*get_current_renderer(), sync_domain::gpu); auto ev2 = std::make_unique(*get_current_renderer(), sync_domain::gpu); - m_events_pool.emplace_back(ev1, ev2, 0ull); + m_events_pool.emplace_back(ev1, ev2, 0ull, i); + } + + for (usz i = 0; i < VK_MAX_ASYNC_COMPUTE_QUEUES; ++i) + { + m_barriers_pool.emplace_back(0ull, 0xFFFF0000 + i); } } @@ -80,6 +96,7 @@ namespace vk ensure(sync_label->completion_eid <= vk::last_completed_event_id()); + m_sync_label_debug_uid = sync_label->uid; sync_label->queue1_signal->reset(); sync_label->queue2_signal->reset(); sync_label->completion_eid = vk::current_event_id(); @@ -143,6 +160,11 @@ namespace vk } } + // 3. Insert a barrier for this CB. A job is about to be scheduled on it immediately. + auto barrier = &m_barriers_pool[m_next_cb_index]; + barrier->completion_eid = m_submit_count; + m_event_queue.push(barrier); + m_next_cb_index++; return m_current_cb; } @@ -160,6 +182,11 @@ namespace vk return std::exchange(m_sync_label, nullptr); } + u64 AsyncTaskScheduler::get_primary_sync_label_debug_uid() + { + return std::exchange(m_sync_label_debug_uid, ~0ull); + } + void AsyncTaskScheduler::flush(VkBool32 force_flush, VkSemaphore wait_semaphore, VkPipelineStageFlags wait_dst_stage_mask) { if (!m_current_cb) @@ -176,6 +203,9 @@ namespace vk m_current_cb->end(); m_current_cb->submit(get_current_renderer()->get_transfer_queue(), wait_semaphore, VK_NULL_HANDLE, nullptr, wait_dst_stage_mask, force_flush); + m_submit_count++; + thread_ctrl::notify(g_fxo->get()); + m_last_used_cb = m_current_cb; m_current_cb = nullptr; m_sync_required = false; diff --git a/rpcs3/Emu/RSX/VK/VKAsyncScheduler.h b/rpcs3/Emu/RSX/VK/VKAsyncScheduler.h index 4a27657d9b..73d6d223ae 100644 --- a/rpcs3/Emu/RSX/VK/VKAsyncScheduler.h +++ b/rpcs3/Emu/RSX/VK/VKAsyncScheduler.h @@ -9,15 +9,32 @@ namespace vk { + enum class xqueue_event_type + { + label, + barrier + }; + struct xqueue_event { + // Type + xqueue_event_type type; + + // Payload std::unique_ptr queue1_signal; std::unique_ptr queue2_signal; - u64 completion_eid; - xqueue_event(): completion_eid(0) {} - xqueue_event(std::unique_ptr& trigger, std::unique_ptr& payload, u64 eid) - : queue1_signal(std::move(trigger)), queue2_signal(std::move(payload)), completion_eid(eid) + // Identifiers + u64 completion_eid; + u64 uid; + + xqueue_event(u64 eid, u64 _uid) + : type(xqueue_event_type::barrier), completion_eid(eid), uid(_uid) + {} + + xqueue_event(std::unique_ptr& trigger, std::unique_ptr& payload, u64 eid, u64 _uid) + : type(xqueue_event_type::label), queue1_signal(std::move(trigger)), queue2_signal(std::move(payload)), + completion_eid(eid), uid(_uid) {} }; @@ -31,6 +48,8 @@ namespace vk command_buffer* m_last_used_cb = nullptr; command_buffer* m_current_cb = nullptr; usz m_next_cb_index = 0; + std::vector m_barriers_pool; + atomic_t m_submit_count = 0; // Scheduler shared_mutex m_config_mutex; @@ -40,6 +59,7 @@ namespace vk // Sync event* m_sync_label = nullptr; atomic_t m_sync_required = false; + u64 m_sync_label_debug_uid = 0; static constexpr u32 events_pool_size = 16384; std::vector m_events_pool; @@ -58,6 +78,7 @@ namespace vk command_buffer* get_current(); event* get_primary_sync_label(); + u64 get_primary_sync_label_debug_uid(); void flush(VkBool32 force_flush, VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); void kill(); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 62cf3dca70..214850ada4 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1,8 +1,9 @@ #pragma once +#include "VKAsyncScheduler.h" +#include "VKDMA.h" #include "VKRenderTargets.h" #include "VKResourceManager.h" -#include "VKDMA.h" #include "vkutils/image_helpers.h" #include "../Common/texture_cache.h" @@ -1062,6 +1063,10 @@ namespace vk if (cmd.access_hint != vk::command_buffer::access_type_hint::all) { + // Flush any pending async jobs in case of blockers + // TODO: Context-level manager should handle this logic + g_fxo->get().flush(VK_TRUE); + // Primary access command queue, must restart it after vk::fence submit_fence(*m_device); cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE);