mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-08 09:09:46 +00:00
vk: Spec-compliant async compute
This commit is contained in:
parent
7895d43a98
commit
cef512a123
21 changed files with 381 additions and 250 deletions
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
void AsyncTaskScheduler::operator()()
|
AsyncTaskScheduler::AsyncTaskScheduler(vk_gpu_scheduler_mode mode)
|
||||||
{
|
{
|
||||||
if (g_cfg.video.renderer != video_renderer::vulkan || !g_cfg.video.vk.asynchronous_texture_streaming)
|
if (g_cfg.video.renderer != video_renderer::vulkan || !g_cfg.video.vk.asynchronous_texture_streaming)
|
||||||
{
|
{
|
||||||
|
@ -21,40 +21,15 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
init_config_options();
|
init_config_options();
|
||||||
if (!m_use_host_scheduler)
|
|
||||||
{
|
|
||||||
// No need to keep the GPU alive using a CPU thread.
|
|
||||||
rsx_log.notice("Host scheduler is disabled. This thread will now exit.");
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this thread is unavailable for too long, your GPU will hard crash and force a full reset
|
AsyncTaskScheduler::~AsyncTaskScheduler()
|
||||||
// TODO: Investigate if this can be executed outside the application context. Attach a debugger to rpcs3 and boom - GPU reset. Not fun rebooting so often.
|
|
||||||
thread_ctrl::set_native_priority(1);
|
|
||||||
|
|
||||||
add_ref();
|
|
||||||
|
|
||||||
while (thread_ctrl::state() != thread_state::aborting)
|
|
||||||
{
|
{
|
||||||
for (auto&& job : m_event_queue.pop_all())
|
if (!m_async_command_queue.empty())
|
||||||
{
|
{
|
||||||
if (job->type == xqueue_event_type::barrier)
|
// Driver resources should be destroyed before driver is detached or you get crashes. RAII won't save you here.
|
||||||
{
|
rsx_log.error("Async task scheduler resources were not freed correctly!");
|
||||||
// Blocks the queue from progressing until the work items are actually submitted to the GPU
|
|
||||||
// Avoids spamming the GPU with event requests when the events have not even been submitted yet
|
|
||||||
while (job->completion_eid == m_submit_count.load())
|
|
||||||
{
|
|
||||||
thread_ctrl::wait_for(100);
|
|
||||||
}
|
}
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
vk::wait_for_event(job->queue1_signal.get(), GENERAL_WAIT_TIMEOUT);
|
|
||||||
job->queue2_signal->host_signal();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
release();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::init_config_options()
|
void AsyncTaskScheduler::init_config_options()
|
||||||
|
@ -66,8 +41,8 @@ namespace vk
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_use_host_scheduler = g_cfg.video.vk.asynchronous_scheduler == vk_gpu_scheduler_mode::host || g_cfg.video.strict_rendering_mode;
|
m_use_host_scheduler = g_cfg.video.vk.asynchronous_scheduler == vk_gpu_scheduler_mode::safe || g_cfg.video.strict_rendering_mode;
|
||||||
rsx_log.notice("Asynchronous task scheduler is active running in %s mode", m_use_host_scheduler? "'Host'" : "'Device'");
|
rsx_log.notice("Asynchronous task scheduler is active running in %s mode", m_use_host_scheduler? "'Safe'" : "'Fast'");
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::delayed_init()
|
void AsyncTaskScheduler::delayed_init()
|
||||||
|
@ -77,58 +52,32 @@ namespace vk
|
||||||
auto pdev = get_current_renderer();
|
auto pdev = get_current_renderer();
|
||||||
m_command_pool.create(*const_cast<render_device*>(pdev), pdev->get_transfer_queue_family());
|
m_command_pool.create(*const_cast<render_device*>(pdev), pdev->get_transfer_queue_family());
|
||||||
|
|
||||||
|
if (m_use_host_scheduler)
|
||||||
|
{
|
||||||
for (usz i = 0; i < events_pool_size; ++i)
|
for (usz i = 0; i < events_pool_size; ++i)
|
||||||
{
|
{
|
||||||
auto ev1 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
auto sema = std::make_unique<semaphore>(*pdev);
|
||||||
auto ev2 = std::make_unique<event>(*get_current_renderer(), sync_domain::gpu);
|
m_semaphore_pool.emplace_back(std::move(sema));
|
||||||
m_events_pool.emplace_back(ev1, ev2, 0ull, i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (usz i = 0; i < VK_MAX_ASYNC_COMPUTE_QUEUES; ++i)
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (usz i = 0; i < events_pool_size; ++i)
|
||||||
{
|
{
|
||||||
m_barriers_pool.emplace_back(0ull, 0xFFFF0000 + i);
|
auto ev = std::make_unique<vk::event>(*pdev, sync_domain::gpu);
|
||||||
|
m_events_pool.emplace_back(std::move(ev));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::insert_sync_event()
|
void AsyncTaskScheduler::insert_sync_event()
|
||||||
{
|
{
|
||||||
ensure(m_current_cb);
|
ensure(m_current_cb);
|
||||||
ensure(m_next_event_id < events_pool_size);
|
auto& sync_label = m_events_pool[m_next_event_id++ % events_pool_size];
|
||||||
auto sync_label = &m_events_pool[m_next_event_id];
|
|
||||||
|
|
||||||
if (++m_next_event_id == events_pool_size)
|
sync_label->reset();
|
||||||
{
|
sync_label->signal(*m_current_cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0);
|
||||||
// Wrap
|
m_sync_label = sync_label.get();
|
||||||
m_next_event_id = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure(sync_label->completion_eid <= vk::last_completed_event_id());
|
|
||||||
|
|
||||||
m_sync_label_debug_uid = sync_label->uid;
|
|
||||||
sync_label->queue1_signal->reset();
|
|
||||||
sync_label->queue2_signal->reset();
|
|
||||||
sync_label->completion_eid = vk::current_event_id();
|
|
||||||
|
|
||||||
sync_label->queue1_signal->signal(*m_current_cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0);
|
|
||||||
|
|
||||||
if (m_use_host_scheduler)
|
|
||||||
{
|
|
||||||
m_event_queue.push(sync_label);
|
|
||||||
m_sync_label = sync_label->queue2_signal.get();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_sync_label = sync_label->queue1_signal.get();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AsyncTaskScheduler::~AsyncTaskScheduler()
|
|
||||||
{
|
|
||||||
if (!m_async_command_queue.empty())
|
|
||||||
{
|
|
||||||
// Driver resources should be destroyed before driver is detached or you get crashes. RAII won't save you here.
|
|
||||||
rsx_log.error("Async task scheduler resources were not freed correctly!");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
command_buffer* AsyncTaskScheduler::get_current()
|
command_buffer* AsyncTaskScheduler::get_current()
|
||||||
|
@ -168,17 +117,14 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Insert a barrier for this CB. A job is about to be scheduled on it immediately.
|
|
||||||
auto barrier = &m_barriers_pool[m_next_cb_index];
|
|
||||||
barrier->completion_eid = m_submit_count;
|
|
||||||
m_event_queue.push(barrier);
|
|
||||||
|
|
||||||
m_next_cb_index++;
|
m_next_cb_index++;
|
||||||
return m_current_cb;
|
return m_current_cb;
|
||||||
}
|
}
|
||||||
|
|
||||||
event* AsyncTaskScheduler::get_primary_sync_label()
|
event* AsyncTaskScheduler::get_primary_sync_label()
|
||||||
{
|
{
|
||||||
|
ensure(!m_use_host_scheduler);
|
||||||
|
|
||||||
if (m_sync_required) [[unlikely]]
|
if (m_sync_required) [[unlikely]]
|
||||||
{
|
{
|
||||||
std::lock_guard lock(m_submit_mutex); // For some reason this is inexplicably expensive. WTF!
|
std::lock_guard lock(m_submit_mutex); // For some reason this is inexplicably expensive. WTF!
|
||||||
|
@ -190,40 +136,45 @@ namespace vk
|
||||||
return std::exchange(m_sync_label, nullptr);
|
return std::exchange(m_sync_label, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 AsyncTaskScheduler::get_primary_sync_label_debug_uid()
|
semaphore* AsyncTaskScheduler::get_sema()
|
||||||
{
|
{
|
||||||
return std::exchange(m_sync_label_debug_uid, ~0ull);
|
if (m_semaphore_pool.empty())
|
||||||
|
{
|
||||||
|
delayed_init();
|
||||||
|
ensure(!m_semaphore_pool.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::flush(VkBool32 force_flush, VkSemaphore wait_semaphore, VkPipelineStageFlags wait_dst_stage_mask)
|
const u32 sema_id = (m_next_semaphore_id++ % m_semaphore_pool.size());
|
||||||
|
return m_semaphore_pool[sema_id].get();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AsyncTaskScheduler::flush(queue_submit_t& submit_info, VkBool32 force_flush)
|
||||||
{
|
{
|
||||||
if (!m_current_cb)
|
if (!m_current_cb)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
submit_info.queue = get_current_renderer()->get_transfer_queue();
|
||||||
|
|
||||||
std::lock_guard lock(m_submit_mutex);
|
std::lock_guard lock(m_submit_mutex);
|
||||||
if (m_sync_required)
|
if (m_sync_required && !m_use_host_scheduler)
|
||||||
{
|
{
|
||||||
insert_sync_event();
|
insert_sync_event();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_current_cb->end();
|
m_current_cb->end();
|
||||||
m_current_cb->submit(get_current_renderer()->get_transfer_queue(), wait_semaphore, VK_NULL_HANDLE, nullptr, wait_dst_stage_mask, force_flush);
|
m_current_cb->submit(submit_info, force_flush);
|
||||||
|
|
||||||
m_submit_count++;
|
m_submit_count++;
|
||||||
thread_ctrl::notify(g_fxo->get<async_scheduler_thread>());
|
|
||||||
|
|
||||||
m_last_used_cb = m_current_cb;
|
m_last_used_cb = m_current_cb;
|
||||||
m_current_cb = nullptr;
|
m_current_cb = nullptr;
|
||||||
m_sync_required = false;
|
m_sync_required = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AsyncTaskScheduler::kill()
|
void AsyncTaskScheduler::destroy()
|
||||||
{
|
{
|
||||||
g_fxo->get<async_scheduler_thread>() = thread_state::aborting;
|
|
||||||
while (has_refs());
|
|
||||||
|
|
||||||
for (auto& cb : m_async_command_queue)
|
for (auto& cb : m_async_command_queue)
|
||||||
{
|
{
|
||||||
cb.destroy();
|
cb.destroy();
|
||||||
|
@ -233,5 +184,6 @@ namespace vk
|
||||||
m_next_cb_index = 0;
|
m_next_cb_index = 0;
|
||||||
m_command_pool.destroy();
|
m_command_pool.destroy();
|
||||||
m_events_pool.clear();
|
m_events_pool.clear();
|
||||||
|
m_semaphore_pool.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,36 +9,7 @@
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
enum class xqueue_event_type
|
class AsyncTaskScheduler
|
||||||
{
|
|
||||||
label,
|
|
||||||
barrier
|
|
||||||
};
|
|
||||||
|
|
||||||
struct xqueue_event
|
|
||||||
{
|
|
||||||
// Type
|
|
||||||
xqueue_event_type type;
|
|
||||||
|
|
||||||
// Payload
|
|
||||||
std::unique_ptr<event> queue1_signal;
|
|
||||||
std::unique_ptr<event> queue2_signal;
|
|
||||||
|
|
||||||
// Identifiers
|
|
||||||
u64 completion_eid;
|
|
||||||
u64 uid;
|
|
||||||
|
|
||||||
xqueue_event(u64 eid, u64 _uid)
|
|
||||||
: type(xqueue_event_type::barrier), completion_eid(eid), uid(_uid)
|
|
||||||
{}
|
|
||||||
|
|
||||||
xqueue_event(std::unique_ptr<event>& trigger, std::unique_ptr<event>& payload, u64 eid, u64 _uid)
|
|
||||||
: type(xqueue_event_type::label), queue1_signal(std::move(trigger)), queue2_signal(std::move(payload)),
|
|
||||||
completion_eid(eid), uid(_uid)
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
|
|
||||||
class AsyncTaskScheduler : private rsx::ref_counted
|
|
||||||
{
|
{
|
||||||
// Vulkan resources
|
// Vulkan resources
|
||||||
std::vector<command_buffer> m_async_command_queue;
|
std::vector<command_buffer> m_async_command_queue;
|
||||||
|
@ -48,7 +19,6 @@ namespace vk
|
||||||
command_buffer* m_last_used_cb = nullptr;
|
command_buffer* m_last_used_cb = nullptr;
|
||||||
command_buffer* m_current_cb = nullptr;
|
command_buffer* m_current_cb = nullptr;
|
||||||
usz m_next_cb_index = 0;
|
usz m_next_cb_index = 0;
|
||||||
std::vector<xqueue_event> m_barriers_pool;
|
|
||||||
atomic_t<u64> m_submit_count = 0;
|
atomic_t<u64> m_submit_count = 0;
|
||||||
|
|
||||||
// Scheduler
|
// Scheduler
|
||||||
|
@ -59,13 +29,14 @@ namespace vk
|
||||||
// Sync
|
// Sync
|
||||||
event* m_sync_label = nullptr;
|
event* m_sync_label = nullptr;
|
||||||
atomic_t<bool> m_sync_required = false;
|
atomic_t<bool> m_sync_required = false;
|
||||||
u64 m_sync_label_debug_uid = 0;
|
|
||||||
|
|
||||||
static constexpr u32 events_pool_size = 16384;
|
static constexpr u32 events_pool_size = 16384;
|
||||||
std::vector<xqueue_event> m_events_pool;
|
std::vector<std::unique_ptr<vk::event>> m_events_pool;
|
||||||
atomic_t<u32> m_next_event_id = 0;
|
atomic_t<u64> m_next_event_id = 0;
|
||||||
|
|
||||||
|
std::vector<std::unique_ptr<vk::semaphore>> m_semaphore_pool;
|
||||||
|
atomic_t<u64> m_next_semaphore_id = 0;
|
||||||
|
|
||||||
lf_queue<xqueue_event*> m_event_queue;
|
|
||||||
shared_mutex m_submit_mutex;
|
shared_mutex m_submit_mutex;
|
||||||
|
|
||||||
void init_config_options();
|
void init_config_options();
|
||||||
|
@ -73,21 +44,18 @@ namespace vk
|
||||||
void insert_sync_event();
|
void insert_sync_event();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AsyncTaskScheduler(const std::string_view& name) : thread_name(name) {} // This ctor stops default initialization by fxo
|
AsyncTaskScheduler(vk_gpu_scheduler_mode mode); // This ctor stops default initialization by fxo
|
||||||
~AsyncTaskScheduler();
|
~AsyncTaskScheduler();
|
||||||
|
|
||||||
command_buffer* get_current();
|
command_buffer* get_current();
|
||||||
event* get_primary_sync_label();
|
event* get_primary_sync_label();
|
||||||
u64 get_primary_sync_label_debug_uid();
|
semaphore* get_sema();
|
||||||
|
|
||||||
void flush(VkBool32 force_flush, VkSemaphore wait_semaphore = VK_NULL_HANDLE, VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
|
void flush(queue_submit_t& submit_info, VkBool32 force_flush);
|
||||||
void kill();
|
void destroy();
|
||||||
|
|
||||||
// Thread entry-point
|
// Inline getters
|
||||||
void operator()();
|
inline bool is_recording() const { return m_current_cb != nullptr; }
|
||||||
|
inline bool is_host_mode() const { return m_use_host_scheduler; }
|
||||||
const std::string_view thread_name;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using async_scheduler_thread = named_thread<AsyncTaskScheduler>;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,17 +24,31 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCE_INLINE
|
FORCE_INLINE
|
||||||
static void queue_submit_impl(VkQueue queue, const VkSubmitInfo* info, fence* pfence)
|
static void queue_submit_impl(const queue_submit_t& submit_info)
|
||||||
{
|
{
|
||||||
|
ensure(submit_info.fence);
|
||||||
acquire_global_submit_lock();
|
acquire_global_submit_lock();
|
||||||
vkQueueSubmit(queue, 1, info, pfence->handle);
|
VkSubmitInfo info
|
||||||
|
{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.waitSemaphoreCount = submit_info.wait_semaphores_count,
|
||||||
|
.pWaitSemaphores = submit_info.wait_semaphores.data(),
|
||||||
|
.pWaitDstStageMask = submit_info.wait_stages.data(),
|
||||||
|
.commandBufferCount = 1,
|
||||||
|
.pCommandBuffers = &submit_info.commands,
|
||||||
|
.signalSemaphoreCount = submit_info.signal_semaphores_count,
|
||||||
|
.pSignalSemaphores = submit_info.signal_semaphores.data()
|
||||||
|
};
|
||||||
|
|
||||||
|
vkQueueSubmit(submit_info.queue, 1, &info, submit_info.fence->handle);
|
||||||
release_global_submit_lock();
|
release_global_submit_lock();
|
||||||
|
|
||||||
// Signal fence
|
// Signal fence
|
||||||
pfence->signal_flushed();
|
submit_info.fence->signal_flushed();
|
||||||
}
|
}
|
||||||
|
|
||||||
void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush)
|
void queue_submit(const queue_submit_t& submit_info, VkBool32 flush)
|
||||||
{
|
{
|
||||||
// Access to this method must be externally synchronized.
|
// Access to this method must be externally synchronized.
|
||||||
// Offloader is guaranteed to never call this for async flushes.
|
// Offloader is guaranteed to never call this for async flushes.
|
||||||
|
@ -42,18 +56,18 @@ namespace vk
|
||||||
|
|
||||||
if (!flush && g_cfg.video.multithreaded_rsx)
|
if (!flush && g_cfg.video.multithreaded_rsx)
|
||||||
{
|
{
|
||||||
auto packet = new submit_packet(queue, pfence, info);
|
auto packet = new queue_submit_t(submit_info);
|
||||||
g_fxo->get<rsx::dma_manager>().backend_ctrl(rctrl_queue_submit, packet);
|
g_fxo->get<rsx::dma_manager>().backend_ctrl(rctrl_queue_submit, packet);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
queue_submit_impl(queue, info, pfence);
|
queue_submit_impl(submit_info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void queue_submit(const vk::submit_packet* packet)
|
void queue_submit(const queue_submit_t* packet)
|
||||||
{
|
{
|
||||||
// Flush-only version used by asynchronous submit processing (MTRSX)
|
// Flush-only version used by asynchronous submit processing (MTRSX)
|
||||||
queue_submit_impl(packet->queue, &packet->submit_info, packet->pfence);
|
queue_submit_impl(*packet);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -380,6 +380,29 @@ void VKGSRender::load_texture_env()
|
||||||
m_cached_renderpass = VK_NULL_HANDLE;
|
m_cached_renderpass = VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g_cfg.video.vk.asynchronous_texture_streaming)
|
||||||
|
{
|
||||||
|
// We have to do this here, because we have to assume the CB will be dumped
|
||||||
|
auto& async_task_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
|
||||||
|
|
||||||
|
if (async_task_scheduler.is_recording())
|
||||||
|
{
|
||||||
|
if (async_task_scheduler.is_host_mode())
|
||||||
|
{
|
||||||
|
flush_command_queue();
|
||||||
|
ensure(!async_task_scheduler.is_recording());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Sync any async scheduler tasks
|
||||||
|
if (auto ev = async_task_scheduler.get_primary_sync_label())
|
||||||
|
{
|
||||||
|
ev->gpu_wait(*m_current_command_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VKGSRender::bind_texture_env()
|
bool VKGSRender::bind_texture_env()
|
||||||
|
@ -1029,12 +1052,6 @@ void VKGSRender::end()
|
||||||
load_program_env();
|
load_program_env();
|
||||||
m_frame_stats.setup_time += m_profiler.duration();
|
m_frame_stats.setup_time += m_profiler.duration();
|
||||||
|
|
||||||
// Sync any async scheduler tasks
|
|
||||||
if (auto ev = g_fxo->get<vk::async_scheduler_thread>().get_primary_sync_label())
|
|
||||||
{
|
|
||||||
ev->gpu_wait(*m_current_command_buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int binding_attempts = 0; binding_attempts < 3; binding_attempts++)
|
for (int binding_attempts = 0; binding_attempts < 3; binding_attempts++)
|
||||||
{
|
{
|
||||||
bool out_of_memory;
|
bool out_of_memory;
|
||||||
|
|
|
@ -575,18 +575,13 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
{
|
{
|
||||||
case vk::driver_vendor::NVIDIA:
|
case vk::driver_vendor::NVIDIA:
|
||||||
if (auto chip_family = vk::get_chip_family();
|
if (auto chip_family = vk::get_chip_family();
|
||||||
chip_family == vk::chip_class::NV_kepler ||
|
chip_family == vk::chip_class::NV_kepler || chip_family == vk::chip_class::NV_maxwell)
|
||||||
chip_family == vk::chip_class::NV_maxwell)
|
|
||||||
{
|
{
|
||||||
rsx_log.error("Older NVIDIA cards do not meet requirements for asynchronous compute due to some driver fakery.");
|
rsx_log.warning("Older NVIDIA cards do not meet requirements for true asynchronous compute due to some driver fakery.");
|
||||||
backend_config.supports_asynchronous_compute = false;
|
|
||||||
}
|
|
||||||
else // Workaround. Remove once the async decoder is re-written
|
|
||||||
{
|
|
||||||
// NVIDIA 471 and newer are completely borked. Queue priority is not observed and any queue waiting on another just causes deadlock.
|
|
||||||
rsx_log.error("NVIDIA GPUs are incompatible with the current implementation of asynchronous texture decoding.");
|
|
||||||
backend_config.supports_asynchronous_compute = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rsx_log.notice("Forcing safe async compute for NVIDIA device to avoid crashing.");
|
||||||
|
g_cfg.video.vk.asynchronous_scheduler.set(vk_gpu_scheduler_mode::safe);
|
||||||
break;
|
break;
|
||||||
#if !defined(_WIN32)
|
#if !defined(_WIN32)
|
||||||
// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
|
// Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations
|
||||||
|
@ -614,7 +609,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
if (backend_config.supports_asynchronous_compute)
|
if (backend_config.supports_asynchronous_compute)
|
||||||
{
|
{
|
||||||
// Run only if async compute can be used.
|
// Run only if async compute can be used.
|
||||||
g_fxo->init<vk::async_scheduler_thread>("Vulkan Async Scheduler"sv);
|
g_fxo->init<vk::AsyncTaskScheduler>(g_cfg.video.vk.asynchronous_scheduler);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -627,21 +622,24 @@ VKGSRender::~VKGSRender()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Globals. TODO: Refactor lifetime management
|
// Flush DMA queue
|
||||||
if (backend_config.supports_asynchronous_compute)
|
while (!g_fxo->get<rsx::dma_manager>().sync())
|
||||||
{
|
{
|
||||||
g_fxo->get<vk::async_scheduler_thread>().kill();
|
do_local_task(rsx::FIFO_state::lock_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Wait for device to finish up with resources
|
//Wait for device to finish up with resources
|
||||||
vkDeviceWaitIdle(*m_device);
|
vkDeviceWaitIdle(*m_device);
|
||||||
|
|
||||||
|
// Globals. TODO: Refactor lifetime management
|
||||||
|
if (backend_config.supports_asynchronous_compute)
|
||||||
|
{
|
||||||
|
g_fxo->get<vk::AsyncTaskScheduler>().destroy();
|
||||||
|
}
|
||||||
|
|
||||||
// Clear flush requests
|
// Clear flush requests
|
||||||
m_flush_requests.clear_pending_flag();
|
m_flush_requests.clear_pending_flag();
|
||||||
|
|
||||||
// Texture cache
|
|
||||||
m_texture_cache.destroy();
|
|
||||||
|
|
||||||
// Shaders
|
// Shaders
|
||||||
vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
|
vk::destroy_pipe_compiler(); // Ensure no pending shaders being compiled
|
||||||
vk::finalize_compiler_context(); // Shut down the glslang compiler
|
vk::finalize_compiler_context(); // Shut down the glslang compiler
|
||||||
|
@ -2064,9 +2062,6 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||||
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
|
const bool sync_success = g_fxo->get<rsx::dma_manager>().sync();
|
||||||
const VkBool32 force_flush = !sync_success;
|
const VkBool32 force_flush = !sync_success;
|
||||||
|
|
||||||
// Flush any asynchronously scheduled jobs
|
|
||||||
g_fxo->get<vk::async_scheduler_thread>().flush(force_flush);
|
|
||||||
|
|
||||||
if (vk::test_status_interrupt(vk::heap_dirty))
|
if (vk::test_status_interrupt(vk::heap_dirty))
|
||||||
{
|
{
|
||||||
if (m_attrib_ring_info.is_dirty() ||
|
if (m_attrib_ring_info.is_dirty() ||
|
||||||
|
@ -2096,8 +2091,8 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||||
|
|
||||||
m_secondary_command_buffer.end();
|
m_secondary_command_buffer.end();
|
||||||
|
|
||||||
m_secondary_command_buffer.submit(m_device->get_graphics_queue(),
|
vk::queue_submit_t submit_info{ m_device->get_graphics_queue(), nullptr };
|
||||||
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, force_flush);
|
m_secondary_command_buffer.submit(submit_info, force_flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::clear_status_interrupt(vk::heap_dirty);
|
vk::clear_status_interrupt(vk::heap_dirty);
|
||||||
|
@ -2128,8 +2123,54 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore
|
||||||
m_current_command_buffer->end();
|
m_current_command_buffer->end();
|
||||||
m_current_command_buffer->tag();
|
m_current_command_buffer->tag();
|
||||||
|
|
||||||
m_current_command_buffer->submit(m_device->get_graphics_queue(),
|
// Flush any asynchronously scheduled jobs
|
||||||
wait_semaphore, signal_semaphore, pFence, pipeline_stage_flags, force_flush);
|
// So this is a bit trippy, but, in this case, the primary CB contains the 'release' operations, not the acquire ones.
|
||||||
|
// The CB that comes in after this submit will acquire the yielded resources automatically.
|
||||||
|
// This means the primary CB is the precursor to the async CB not the other way around.
|
||||||
|
// Async CB should wait for the primary CB to signal.
|
||||||
|
vk::queue_submit_t primary_submit_info{ m_device->get_graphics_queue(), pFence };
|
||||||
|
vk::queue_submit_t secondary_submit_info{};
|
||||||
|
|
||||||
|
if (wait_semaphore)
|
||||||
|
{
|
||||||
|
primary_submit_info.wait_on(wait_semaphore, pipeline_stage_flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto wait_sema = std::exchange(m_dangling_semaphore_signal, VK_NULL_HANDLE))
|
||||||
|
{
|
||||||
|
// TODO: Sync on VS stage
|
||||||
|
primary_submit_info.wait_on(wait_sema, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& async_scheduler = g_fxo->get<vk::AsyncTaskScheduler>();
|
||||||
|
const bool require_secondary_flush = async_scheduler.is_recording();
|
||||||
|
|
||||||
|
if (async_scheduler.is_recording())
|
||||||
|
{
|
||||||
|
if (async_scheduler.is_host_mode())
|
||||||
|
{
|
||||||
|
// Inject dependency chain using semaphores.
|
||||||
|
// HEAD = externally synchronized.
|
||||||
|
// TAIL = insert dangling wait, from the async CB to the next CB down.
|
||||||
|
m_dangling_semaphore_signal = *async_scheduler.get_sema();
|
||||||
|
secondary_submit_info.queue_signal(m_dangling_semaphore_signal);
|
||||||
|
|
||||||
|
// Delay object destruction by one cycle
|
||||||
|
vk::get_resource_manager()->push_down_current_scope();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (signal_semaphore)
|
||||||
|
{
|
||||||
|
primary_submit_info.queue_signal(signal_semaphore);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_current_command_buffer->submit(primary_submit_info, force_flush);
|
||||||
|
|
||||||
|
if (require_secondary_flush)
|
||||||
|
{
|
||||||
|
async_scheduler.flush(secondary_submit_info, force_flush);
|
||||||
|
}
|
||||||
|
|
||||||
if (force_flush)
|
if (force_flush)
|
||||||
{
|
{
|
||||||
|
@ -2367,7 +2408,7 @@ void VKGSRender::renderctl(u32 request_code, void* args)
|
||||||
{
|
{
|
||||||
case vk::rctrl_queue_submit:
|
case vk::rctrl_queue_submit:
|
||||||
{
|
{
|
||||||
const auto packet = reinterpret_cast<vk::submit_packet*>(args);
|
const auto packet = reinterpret_cast<vk::queue_submit_t*>(args);
|
||||||
vk::queue_submit(packet);
|
vk::queue_submit(packet);
|
||||||
free(packet);
|
free(packet);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -429,6 +429,7 @@ private:
|
||||||
u32 m_current_cb_index = 0;
|
u32 m_current_cb_index = 0;
|
||||||
std::array<vk::command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
std::array<vk::command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
||||||
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
|
vk::command_buffer_chunk* m_current_command_buffer = nullptr;
|
||||||
|
VkSemaphore m_dangling_semaphore_signal = VK_NULL_HANDLE;
|
||||||
|
|
||||||
VkDescriptorSetLayout descriptor_layouts;
|
VkDescriptorSetLayout descriptor_layouts;
|
||||||
VkPipelineLayout pipeline_layout;
|
VkPipelineLayout pipeline_layout;
|
||||||
|
|
|
@ -31,7 +31,7 @@ namespace vk
|
||||||
class image;
|
class image;
|
||||||
class instance;
|
class instance;
|
||||||
class render_device;
|
class render_device;
|
||||||
struct submit_packet;
|
struct queue_submit_t;
|
||||||
|
|
||||||
enum runtime_state
|
enum runtime_state
|
||||||
{
|
{
|
||||||
|
@ -53,7 +53,7 @@ namespace vk
|
||||||
// Sync helpers around vkQueueSubmit
|
// Sync helpers around vkQueueSubmit
|
||||||
void acquire_global_submit_lock();
|
void acquire_global_submit_lock();
|
||||||
void release_global_submit_lock();
|
void release_global_submit_lock();
|
||||||
void queue_submit(const vk::submit_packet* packet);
|
void queue_submit(const vk::queue_submit_t* packet);
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
T* get_compute_task();
|
T* get_compute_task();
|
||||||
|
|
|
@ -194,6 +194,11 @@ namespace vk
|
||||||
dispose(ptr);
|
dispose(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void push_down_current_scope()
|
||||||
|
{
|
||||||
|
get_current_eid_scope().eid++;
|
||||||
|
}
|
||||||
|
|
||||||
void eid_completed(u64 eid)
|
void eid_completed(u64 eid)
|
||||||
{
|
{
|
||||||
while (!m_eid_map.empty())
|
while (!m_eid_map.empty())
|
||||||
|
|
|
@ -811,7 +811,7 @@ namespace vk
|
||||||
const vk::command_buffer* pcmd = nullptr;
|
const vk::command_buffer* pcmd = nullptr;
|
||||||
if (flags & image_upload_options::upload_contents_async)
|
if (flags & image_upload_options::upload_contents_async)
|
||||||
{
|
{
|
||||||
auto async_cmd = g_fxo->get<vk::async_scheduler_thread>().get_current();
|
auto async_cmd = g_fxo->get<AsyncTaskScheduler>().get_current();
|
||||||
async_cmd->begin();
|
async_cmd->begin();
|
||||||
pcmd = async_cmd;
|
pcmd = async_cmd;
|
||||||
|
|
||||||
|
@ -832,9 +832,20 @@ namespace vk
|
||||||
|
|
||||||
ensure(pcmd);
|
ensure(pcmd);
|
||||||
|
|
||||||
|
// Queue transfer stuff. Must release from primary if owned and acquire in secondary.
|
||||||
|
const bool need_queue_xfer = dst_image->current_layout != VK_IMAGE_LAYOUT_UNDEFINED && primary_cb.get_queue_family() != pcmd->get_queue_family();
|
||||||
|
if (need_queue_xfer)
|
||||||
|
{
|
||||||
|
dst_image->queue_release(primary_cb, pcmd->get_queue_family(), dst_image->current_layout);
|
||||||
|
}
|
||||||
|
|
||||||
if (flags & image_upload_options::initialize_image_layout)
|
if (flags & image_upload_options::initialize_image_layout)
|
||||||
{
|
{
|
||||||
dst_image->change_layout(*pcmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, pcmd->get_queue_family());
|
dst_image->change_layout(*pcmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
|
}
|
||||||
|
else if (need_queue_xfer)
|
||||||
|
{
|
||||||
|
dst_image->queue_acquire(*pcmd, dst_image->current_layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
return *pcmd;
|
return *pcmd;
|
||||||
|
@ -1119,6 +1130,12 @@ namespace vk
|
||||||
{
|
{
|
||||||
vkCmdCopyBufferToImage(cmd2, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
|
vkCmdCopyBufferToImage(cmd2, upload_buffer->value, dst_image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<u32>(copy_regions.size()), copy_regions.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cmd2.get_queue_family() != cmd.get_queue_family())
|
||||||
|
{
|
||||||
|
// Release from async chain, the primary chain will acquire later
|
||||||
|
dst_image->queue_release(cmd2, cmd.get_queue_family(), dst_image->current_layout);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, const rsx::typeless_xfer& xfer_info)
|
void blitter::scale_image(vk::command_buffer& cmd, vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, const rsx::typeless_xfer& xfer_info)
|
||||||
|
|
|
@ -935,8 +935,9 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const rsx::flags32_t create_flags = g_fxo->get<AsyncTaskScheduler>().is_host_mode() ? texture_create_flags::do_not_reuse : 0;
|
||||||
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled,
|
auto section = create_new_texture(cmd, rsx_range, width, height, depth, mipmaps, pitch, gcm_format, context, type, swizzled,
|
||||||
rsx::component_order::default_, 0);
|
rsx::component_order::default_, create_flags);
|
||||||
|
|
||||||
auto image = section->get_raw_texture();
|
auto image = section->get_raw_texture();
|
||||||
image->set_debug_name(fmt::format("Raw Texture @0x%x", rsx_range.start));
|
image->set_debug_name(fmt::format("Raw Texture @0x%x", rsx_range.start));
|
||||||
|
@ -950,8 +951,12 @@ namespace vk
|
||||||
input_swizzled = false;
|
input_swizzled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
rsx::flags32_t upload_command_flags = initialize_image_layout |
|
rsx::flags32_t upload_command_flags = initialize_image_layout | upload_contents_inline;
|
||||||
(rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute ? upload_contents_async : upload_contents_inline);
|
if (context == rsx::texture_upload_context::shader_read &&
|
||||||
|
rsx::get_current_renderer()->get_backend_config().supports_asynchronous_compute)
|
||||||
|
{
|
||||||
|
upload_command_flags |= upload_contents_async;
|
||||||
|
}
|
||||||
|
|
||||||
const u16 layer_count = (type == rsx::texture_dimension_extended::texture_dimension_cubemap) ? 6 : 1;
|
const u16 layer_count = (type == rsx::texture_dimension_extended::texture_dimension_cubemap) ? 6 : 1;
|
||||||
vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, layer_count, image->aspect(),
|
vk::upload_image(cmd, image, subresource_layout, gcm_format, input_swizzled, layer_count, image->aspect(),
|
||||||
|
@ -1086,11 +1091,40 @@ namespace vk
|
||||||
{
|
{
|
||||||
// Flush any pending async jobs in case of blockers
|
// Flush any pending async jobs in case of blockers
|
||||||
// TODO: Context-level manager should handle this logic
|
// TODO: Context-level manager should handle this logic
|
||||||
g_fxo->get<async_scheduler_thread>().flush(VK_TRUE);
|
auto& async_scheduler = g_fxo->get<AsyncTaskScheduler>();
|
||||||
|
vk::semaphore* async_sema = nullptr;
|
||||||
|
|
||||||
|
if (async_scheduler.is_recording())
|
||||||
|
{
|
||||||
|
if (async_scheduler.is_host_mode())
|
||||||
|
{
|
||||||
|
async_sema = async_scheduler.get_sema();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vk::queue_submit_t submit_info{};
|
||||||
|
async_scheduler.flush(submit_info, VK_TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Primary access command queue, must restart it after
|
||||||
// Primary access command queue, must restart it after
|
// Primary access command queue, must restart it after
|
||||||
vk::fence submit_fence(*m_device);
|
vk::fence submit_fence(*m_device);
|
||||||
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, &submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE);
|
vk::queue_submit_t submit_info{ m_submit_queue, &submit_fence };
|
||||||
|
|
||||||
|
if (async_sema)
|
||||||
|
{
|
||||||
|
submit_info.queue_signal(*async_sema);
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.submit(submit_info, VK_TRUE);
|
||||||
|
|
||||||
|
if (async_sema)
|
||||||
|
{
|
||||||
|
vk::queue_submit_t submit_info2{};
|
||||||
|
submit_info2.wait_on(*async_sema, VK_PIPELINE_STAGE_TRANSFER_BIT);
|
||||||
|
async_scheduler.flush(submit_info2, VK_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
vk::wait_for_fence(&submit_fence, GENERAL_WAIT_TIMEOUT);
|
vk::wait_for_fence(&submit_fence, GENERAL_WAIT_TIMEOUT);
|
||||||
|
|
||||||
|
@ -1100,7 +1134,8 @@ namespace vk
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Auxilliary command queue with auto-restart capability
|
// Auxilliary command queue with auto-restart capability
|
||||||
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_TRUE);
|
vk::queue_submit_t submit_info{ m_submit_queue, nullptr };
|
||||||
|
cmd.submit(submit_info, VK_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure(cmd.flags == 0);
|
ensure(cmd.flags == 0);
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
// This queue flushing method to be implemented by the backend as behavior depends on config
|
// This queue flushing method to be implemented by the backend as behavior depends on config
|
||||||
void queue_submit(VkQueue queue, const VkSubmitInfo* info, fence* pfence, VkBool32 flush);
|
void queue_submit(const queue_submit_t& submit_info, VkBool32 flush);
|
||||||
|
|
||||||
void command_pool::create(vk::render_device& dev, u32 queue_family_id)
|
void command_pool::create(vk::render_device& dev, u32 queue_family_id)
|
||||||
{
|
{
|
||||||
|
@ -112,7 +112,7 @@ namespace vk
|
||||||
is_open = false;
|
is_open = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void command_buffer::submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, fence* pfence, VkPipelineStageFlags pipeline_stage_flags, VkBool32 flush)
|
void command_buffer::submit(queue_submit_t& submit_info, VkBool32 flush)
|
||||||
{
|
{
|
||||||
if (is_open)
|
if (is_open)
|
||||||
{
|
{
|
||||||
|
@ -123,31 +123,14 @@ namespace vk
|
||||||
// Check for hanging queries to avoid driver hang
|
// Check for hanging queries to avoid driver hang
|
||||||
ensure((flags & cb_has_open_query) == 0); // "close and submit of commandbuffer with a hanging query!"
|
ensure((flags & cb_has_open_query) == 0); // "close and submit of commandbuffer with a hanging query!"
|
||||||
|
|
||||||
if (!pfence)
|
if (!submit_info.fence)
|
||||||
{
|
{
|
||||||
pfence = m_submit_fence;
|
submit_info.fence = m_submit_fence;
|
||||||
is_pending = bool(pfence);
|
is_pending = bool(submit_info.fence);
|
||||||
}
|
}
|
||||||
|
|
||||||
VkSubmitInfo infos = {};
|
submit_info.commands = this->commands;
|
||||||
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
queue_submit(submit_info, flush);
|
||||||
infos.commandBufferCount = 1;
|
|
||||||
infos.pCommandBuffers = &commands;
|
|
||||||
infos.pWaitDstStageMask = &pipeline_stage_flags;
|
|
||||||
|
|
||||||
if (wait_semaphore)
|
|
||||||
{
|
|
||||||
infos.waitSemaphoreCount = 1;
|
|
||||||
infos.pWaitSemaphores = &wait_semaphore;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (signal_semaphore)
|
|
||||||
{
|
|
||||||
infos.signalSemaphoreCount = 1;
|
|
||||||
infos.pSignalSemaphores = &signal_semaphore;
|
|
||||||
}
|
|
||||||
|
|
||||||
queue_submit(queue, &infos, pfence, flush);
|
|
||||||
clear_flags();
|
clear_flags();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,42 @@ namespace vk
|
||||||
operator VkCommandPool() const;
|
operator VkCommandPool() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct queue_submit_t
|
||||||
|
{
|
||||||
|
VkQueue queue = VK_NULL_HANDLE;
|
||||||
|
fence* fence = nullptr;
|
||||||
|
VkCommandBuffer commands = VK_NULL_HANDLE;
|
||||||
|
std::array<VkSemaphore, 4> wait_semaphores;
|
||||||
|
std::array<VkSemaphore, 4> signal_semaphores;
|
||||||
|
std::array<VkPipelineStageFlags, 4> wait_stages;
|
||||||
|
u32 wait_semaphores_count = 0;
|
||||||
|
u32 signal_semaphores_count = 0;
|
||||||
|
|
||||||
|
queue_submit_t() = default;
|
||||||
|
queue_submit_t(VkQueue queue_, vk::fence* fence_)
|
||||||
|
: queue(queue_), fence(fence_) {}
|
||||||
|
|
||||||
|
queue_submit_t(const queue_submit_t& other)
|
||||||
|
{
|
||||||
|
std::memcpy(this, &other, sizeof(queue_submit_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline queue_submit_t& wait_on(VkSemaphore semaphore, VkPipelineStageFlags stage)
|
||||||
|
{
|
||||||
|
ensure(wait_semaphores_count < 4);
|
||||||
|
wait_semaphores[wait_semaphores_count] = semaphore;
|
||||||
|
wait_stages[wait_semaphores_count++] = stage;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline queue_submit_t& queue_signal(VkSemaphore semaphore)
|
||||||
|
{
|
||||||
|
ensure(signal_semaphores_count < 4);
|
||||||
|
signal_semaphores[signal_semaphores_count++] = semaphore;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class command_buffer
|
class command_buffer
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
@ -64,7 +100,7 @@ namespace vk
|
||||||
|
|
||||||
void begin();
|
void begin();
|
||||||
void end();
|
void end();
|
||||||
void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, fence* pfence, VkPipelineStageFlags pipeline_stage_flags, VkBool32 flush = VK_FALSE);
|
void submit(queue_submit_t& submit_info, VkBool32 flush = VK_FALSE);
|
||||||
|
|
||||||
// Properties
|
// Properties
|
||||||
command_pool& get_command_pool() const
|
command_pool& get_command_pool() const
|
||||||
|
|
|
@ -179,20 +179,25 @@ namespace vk
|
||||||
return m_format_class;
|
return m_format_class;
|
||||||
}
|
}
|
||||||
|
|
||||||
void image::push_layout(VkCommandBuffer cmd, VkImageLayout layout)
|
void image::push_layout(const command_buffer& cmd, VkImageLayout layout)
|
||||||
{
|
{
|
||||||
|
ensure(current_queue_family == VK_QUEUE_FAMILY_IGNORED || current_queue_family == cmd.get_queue_family());
|
||||||
|
|
||||||
m_layout_stack.push(current_layout);
|
m_layout_stack.push(current_layout);
|
||||||
change_image_layout(cmd, this, layout);
|
change_image_layout(cmd, this, layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
void image::push_barrier(VkCommandBuffer cmd, VkImageLayout layout)
|
void image::push_barrier(const command_buffer& cmd, VkImageLayout layout)
|
||||||
{
|
{
|
||||||
|
ensure(current_queue_family == VK_QUEUE_FAMILY_IGNORED || current_queue_family == cmd.get_queue_family());
|
||||||
|
|
||||||
m_layout_stack.push(current_layout);
|
m_layout_stack.push(current_layout);
|
||||||
insert_texture_barrier(cmd, this, layout);
|
insert_texture_barrier(cmd, this, layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
void image::pop_layout(VkCommandBuffer cmd)
|
void image::pop_layout(const command_buffer& cmd)
|
||||||
{
|
{
|
||||||
|
ensure(current_queue_family == VK_QUEUE_FAMILY_IGNORED || current_queue_family == cmd.get_queue_family());
|
||||||
ensure(!m_layout_stack.empty());
|
ensure(!m_layout_stack.empty());
|
||||||
|
|
||||||
auto layout = m_layout_stack.top();
|
auto layout = m_layout_stack.top();
|
||||||
|
@ -200,37 +205,48 @@ namespace vk
|
||||||
change_image_layout(cmd, this, layout);
|
change_image_layout(cmd, this, layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void image::queue_acquire(const command_buffer& cmd, VkImageLayout new_layout)
|
||||||
|
{
|
||||||
|
ensure(m_layout_stack.empty());
|
||||||
|
ensure(current_queue_family != cmd.get_queue_family());
|
||||||
|
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||||
|
change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, cmd.get_queue_family(), 0u, ~0u);
|
||||||
|
|
||||||
|
current_layout = new_layout;
|
||||||
|
current_queue_family = cmd.get_queue_family();
|
||||||
|
}
|
||||||
|
|
||||||
|
void image::queue_release(const command_buffer& src_queue_cmd, u32 dst_queue_family, VkImageLayout new_layout)
|
||||||
|
{
|
||||||
|
ensure(current_queue_family == src_queue_cmd.get_queue_family());
|
||||||
|
ensure(m_layout_stack.empty());
|
||||||
|
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
||||||
|
change_image_layout(src_queue_cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue_family, ~0u, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout)
|
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout)
|
||||||
{
|
{
|
||||||
if (current_layout == new_layout)
|
// This is implicitly an acquire op
|
||||||
|
if (const auto new_queue_family = cmd.get_queue_family();
|
||||||
|
current_queue_family == VK_QUEUE_FAMILY_IGNORED)
|
||||||
|
{
|
||||||
|
current_queue_family = new_queue_family;
|
||||||
|
}
|
||||||
|
else if (current_queue_family != new_queue_family)
|
||||||
|
{
|
||||||
|
queue_acquire(cmd, new_layout);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current_layout == new_layout)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ensure(m_layout_stack.empty());
|
ensure(m_layout_stack.empty());
|
||||||
change_image_layout(cmd, this, new_layout);
|
change_image_layout(cmd, this, new_layout);
|
||||||
}
|
|
||||||
|
|
||||||
void image::change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family)
|
current_queue_family = cmd.get_queue_family();
|
||||||
{
|
|
||||||
if (current_layout == new_layout && current_queue_family == new_queue_family)
|
|
||||||
{
|
|
||||||
// Nothing to do
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure(m_layout_stack.empty());
|
|
||||||
u32 dst_queue = new_queue_family;
|
|
||||||
|
|
||||||
if (current_queue_family == VK_QUEUE_FAMILY_IGNORED)
|
|
||||||
{
|
|
||||||
// Implicit acquisition
|
|
||||||
dst_queue = VK_QUEUE_FAMILY_IGNORED;
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageSubresourceRange range = { aspect(), 0, mipmaps(), 0, layers() };
|
|
||||||
change_image_layout(cmd, value, current_layout, new_layout, range, current_queue_family, dst_queue);
|
|
||||||
|
|
||||||
current_layout = new_layout;
|
|
||||||
current_queue_family = new_queue_family;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void image::set_debug_name(const std::string& name)
|
void image::set_debug_name(const std::string& name)
|
||||||
|
|
|
@ -77,11 +77,14 @@ namespace vk
|
||||||
rsx::format_class format_class() const;
|
rsx::format_class format_class() const;
|
||||||
|
|
||||||
// Pipeline management
|
// Pipeline management
|
||||||
void push_layout(VkCommandBuffer cmd, VkImageLayout layout);
|
void push_layout(const command_buffer& cmd, VkImageLayout layout);
|
||||||
void push_barrier(VkCommandBuffer cmd, VkImageLayout layout);
|
void push_barrier(const command_buffer& cmd, VkImageLayout layout);
|
||||||
void pop_layout(VkCommandBuffer cmd);
|
void pop_layout(const command_buffer& cmd);
|
||||||
void change_layout(const command_buffer& cmd, VkImageLayout new_layout);
|
void change_layout(const command_buffer& cmd, VkImageLayout new_layout);
|
||||||
void change_layout(const command_buffer& cmd, VkImageLayout new_layout, u32 new_queue_family);
|
|
||||||
|
// Queue transfer
|
||||||
|
void queue_acquire(const command_buffer& cmd, VkImageLayout new_layout);
|
||||||
|
void queue_release(const command_buffer& src_queue_cmd, u32 dst_queue_family, VkImageLayout new_layout);
|
||||||
|
|
||||||
// Debug utils
|
// Debug utils
|
||||||
void set_debug_name(const std::string& name);
|
void set_debug_name(const std::string& name);
|
||||||
|
|
|
@ -56,7 +56,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
|
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
|
||||||
u32 src_queue_family, u32 dst_queue_family)
|
u32 src_queue_family, u32 dst_queue_family, u32 src_access_mask_bits, u32 dst_access_mask_bits)
|
||||||
{
|
{
|
||||||
if (vk::is_renderpass_open(cmd))
|
if (vk::is_renderpass_open(cmd))
|
||||||
{
|
{
|
||||||
|
@ -196,6 +196,12 @@ namespace vk
|
||||||
break; //TODO Investigate what happens here
|
break; //TODO Investigate what happens here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
barrier.srcAccessMask &= src_access_mask_bits;
|
||||||
|
barrier.dstAccessMask &= dst_access_mask_bits;
|
||||||
|
|
||||||
|
if (!barrier.srcAccessMask) src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||||
|
if (!barrier.dstAccessMask) dst_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||||
|
|
||||||
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
|
vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,9 @@ namespace vk
|
||||||
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
|
VkComponentMapping apply_swizzle_remap(const std::array<VkComponentSwizzle, 4>& base_remap, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector);
|
||||||
|
|
||||||
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
|
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range,
|
||||||
u32 src_queue_family = VK_QUEUE_FAMILY_IGNORED, u32 dst_queue_family = VK_QUEUE_FAMILY_IGNORED);
|
u32 src_queue_family = VK_QUEUE_FAMILY_IGNORED, u32 dst_queue_family = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
u32 src_access_mask_bits = 0xFFFFFFFF, u32 dst_access_mask_bits = 0xFFFFFFFF);
|
||||||
|
|
||||||
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout, const VkImageSubresourceRange& range);
|
||||||
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout);
|
void change_image_layout(VkCommandBuffer cmd, vk::image* image, VkImageLayout new_layout);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,9 +53,27 @@ namespace vk
|
||||||
return (handle != VK_NULL_HANDLE);
|
return (handle != VK_NULL_HANDLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
event::event(const render_device& dev, sync_domain domain)
|
semaphore::semaphore(const render_device& dev)
|
||||||
|
: m_device(dev)
|
||||||
|
{
|
||||||
|
VkSemaphoreCreateInfo info{};
|
||||||
|
info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||||
|
CHECK_RESULT(vkCreateSemaphore(m_device, &info, nullptr, &m_handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
semaphore::~semaphore()
|
||||||
|
{
|
||||||
|
vkDestroySemaphore(m_device, m_handle, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
semaphore::operator VkSemaphore() const
|
||||||
|
{
|
||||||
|
return m_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
event::event(const render_device& dev, sync_domain domain)
|
||||||
|
: m_device(dev)
|
||||||
{
|
{
|
||||||
m_device = dev;
|
|
||||||
if (domain == sync_domain::gpu || dev.gpu().get_driver_vendor() != driver_vendor::AMD)
|
if (domain == sync_domain::gpu || dev.gpu().get_driver_vendor() != driver_vendor::AMD)
|
||||||
{
|
{
|
||||||
VkEventCreateInfo info
|
VkEventCreateInfo info
|
||||||
|
|
|
@ -24,6 +24,7 @@ namespace vk
|
||||||
|
|
||||||
fence(VkDevice dev);
|
fence(VkDevice dev);
|
||||||
~fence();
|
~fence();
|
||||||
|
fence(const fence&) = delete;
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
void signal_flushed();
|
void signal_flushed();
|
||||||
|
@ -43,6 +44,7 @@ namespace vk
|
||||||
public:
|
public:
|
||||||
event(const render_device& dev, sync_domain domain);
|
event(const render_device& dev, sync_domain domain);
|
||||||
~event();
|
~event();
|
||||||
|
event(const event&) = delete;
|
||||||
|
|
||||||
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access);
|
void signal(const command_buffer& cmd, VkPipelineStageFlags stages, VkAccessFlags access);
|
||||||
void host_signal() const;
|
void host_signal() const;
|
||||||
|
@ -51,6 +53,21 @@ namespace vk
|
||||||
void reset() const;
|
void reset() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class semaphore
|
||||||
|
{
|
||||||
|
VkSemaphore m_handle = VK_NULL_HANDLE;
|
||||||
|
VkDevice m_device = VK_NULL_HANDLE;
|
||||||
|
|
||||||
|
semaphore() = default;
|
||||||
|
|
||||||
|
public:
|
||||||
|
semaphore(const render_device& dev);
|
||||||
|
~semaphore();
|
||||||
|
semaphore(const semaphore&) = delete;
|
||||||
|
|
||||||
|
operator VkSemaphore() const;
|
||||||
|
};
|
||||||
|
|
||||||
VkResult wait_for_fence(fence* pFence, u64 timeout = 0ull);
|
VkResult wait_for_fence(fence* pFence, u64 timeout = 0ull);
|
||||||
VkResult wait_for_event(event* pEvent, u64 timeout = 0ull);
|
VkResult wait_for_event(event* pEvent, u64 timeout = 0ull);
|
||||||
}
|
}
|
||||||
|
|
|
@ -168,7 +168,7 @@ struct cfg_root : cfg::node
|
||||||
cfg::_bool asynchronous_texture_streaming{ this, "Asynchronous Texture Streaming 2", false };
|
cfg::_bool asynchronous_texture_streaming{ this, "Asynchronous Texture Streaming 2", false };
|
||||||
cfg::_bool fsr_upscaling{ this, "Enable FidelityFX Super Resolution Upscaling", false, true };
|
cfg::_bool fsr_upscaling{ this, "Enable FidelityFX Super Resolution Upscaling", false, true };
|
||||||
cfg::uint<0, 100> rcas_sharpening_intensity{ this, "FidelityFX CAS Sharpening Intensity", 50, true };
|
cfg::uint<0, 100> rcas_sharpening_intensity{ this, "FidelityFX CAS Sharpening Intensity", 50, true };
|
||||||
cfg::_enum<vk_gpu_scheduler_mode> asynchronous_scheduler{ this, "Asynchronous Queue Scheduler", vk_gpu_scheduler_mode::device };
|
cfg::_enum<vk_gpu_scheduler_mode> asynchronous_scheduler{ this, "Asynchronous Queue Scheduler", vk_gpu_scheduler_mode::safe };
|
||||||
|
|
||||||
} vk{ this };
|
} vk{ this };
|
||||||
|
|
||||||
|
|
|
@ -521,8 +521,8 @@ void fmt_class_string<vk_gpu_scheduler_mode>::format(std::string& out, u64 arg)
|
||||||
{
|
{
|
||||||
switch (value)
|
switch (value)
|
||||||
{
|
{
|
||||||
case vk_gpu_scheduler_mode::host: return "Host";
|
case vk_gpu_scheduler_mode::safe: return "Safe";
|
||||||
case vk_gpu_scheduler_mode::device: return "Device";
|
case vk_gpu_scheduler_mode::fast: return "Fast";
|
||||||
}
|
}
|
||||||
|
|
||||||
return unknown;
|
return unknown;
|
||||||
|
|
|
@ -228,8 +228,8 @@ enum class shader_mode
|
||||||
|
|
||||||
enum class vk_gpu_scheduler_mode
|
enum class vk_gpu_scheduler_mode
|
||||||
{
|
{
|
||||||
host,
|
safe,
|
||||||
device
|
fast
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class thread_scheduler_mode
|
enum class thread_scheduler_mode
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue