rsx: Synchronization improvements

- Always flush the primary queue and wait if not involking readback from rsx thread
-- Should fix some instances of device_lost when using WCB
-- Marked remaining case as TODO
-- TODO: optimize amount of time rsx waits for external threads trying to read
This commit is contained in:
kd-11 2018-01-15 22:28:25 +03:00
parent cbc8bf01a1
commit 9ec2337192
3 changed files with 154 additions and 127 deletions

View file

@ -811,7 +811,13 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (!is_rsxthr)
{
//Always submit primary cb to ensure state consistency (flush pending changes such as image transitions)
vm::temporary_unlock();
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
m_flush_requests.post(sync_timestamp == 0ull);
has_queue_ref = true;
}
else
{
@ -821,67 +827,36 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
if (sync_timestamp > 0)
{
//Wait for any cb submitted after the sync timestamp to finish
while (true)
//Wait for earliest cb submitted after the sync timestamp to finish
command_buffer_chunk *target_cb = nullptr;
for (auto &cb : m_primary_cb_list)
{
u32 pending = 0;
if (m_last_flushable_cb < 0)
break;
for (auto &cb : m_primary_cb_list)
if (cb.pending && cb.last_sync >= sync_timestamp)
{
if (!cb.pending && cb.last_sync >= sync_timestamp)
{
pending = 0;
break;
}
if (cb.pending)
{
pending++;
if (is_rsxthr)
cb.poke();
}
if (target_cb == nullptr || target_cb->last_sync > cb.last_sync)
target_cb = &cb;
}
if (!pending)
break;
std::this_thread::yield();
}
if (target_cb)
target_cb->wait();
if (is_rsxthr)
m_last_flushable_cb = -1;
}
else
if (has_queue_ref)
{
if (!is_rsxthr)
{
{
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
m_flush_commands = true;
m_queued_threads++;
}
//Wait for the RSX thread to process
while (m_flush_commands)
{
_mm_lfence();
_mm_pause();
}
has_queue_ref = true;
}
//Wait for the RSX thread to process request if it hasn't already
m_flush_requests.producer_wait();
}
m_texture_cache.flush_all(result, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
if (has_queue_ref)
{
m_queued_threads--;
//Release RSX thread
m_flush_requests.remove_one();
}
}
@ -1855,7 +1830,7 @@ void VKGSRender::flush_command_queue(bool hard_sync)
}
m_last_flushable_cb = -1;
m_flush_commands = false;
m_flush_requests.clear_pending_flag();
}
else
{
@ -2037,15 +2012,7 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
void VKGSRender::do_local_task(bool idle)
{
//TODO: Guard this
if (m_overlay_cleanup_requests.size())
{
flush_command_queue(true);
m_ui_renderer->remove_temp_resources();
m_overlay_cleanup_requests.clear();
}
if (m_flush_commands)
if (m_flush_requests.pending())
{
std::lock_guard<std::mutex> lock(m_flush_queue_mutex);
@ -2053,12 +2020,8 @@ void VKGSRender::do_local_task(bool idle)
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
flush_command_queue();
m_flush_commands = false;
while (m_queued_threads)
{
_mm_lfence();
_mm_pause();
}
m_flush_requests.clear_pending_flag();
m_flush_requests.consumer_wait();
}
if (m_last_flushable_cb > -1)
@ -2151,7 +2114,14 @@ void VKGSRender::do_local_task(bool idle)
#endif
if (m_custom_ui)
//TODO: Guard this
if (m_overlay_cleanup_requests.size())
{
flush_command_queue(true);
m_ui_renderer->remove_temp_resources();
m_overlay_cleanup_requests.clear();
}
else if (m_custom_ui)
{
if (!in_begin_end && native_ui_flip_request.load())
{

View file

@ -43,6 +43,7 @@ struct command_buffer_chunk: public vk::command_buffer
std::atomic_bool pending = { false };
std::atomic<u64> last_sync = { 0 };
std::mutex guard_mutex;
command_buffer_chunk()
{}
@ -84,8 +85,13 @@ struct command_buffer_chunk: public vk::command_buffer
{
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
{
vkResetFences(m_device, 1, &submit_fence);
pending = false;
std::lock_guard<std::mutex> lock(guard_mutex);
if (pending)
{
vkResetFences(m_device, 1, &submit_fence);
pending = false;
}
}
return !pending;
@ -93,6 +99,8 @@ struct command_buffer_chunk: public vk::command_buffer
void wait()
{
std::lock_guard<std::mutex> lock(guard_mutex);
if (!pending)
return;
@ -116,6 +124,114 @@ struct occlusion_data
command_buffer_chunk* command_buffer_to_wait = nullptr;
};
struct frame_context_t
{
VkSemaphore present_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_semaphore = other.present_semaphore;
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
//Exchange storage (non-copyable)
void swap_storage(frame_context_t &other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
std::swap(samplers_to_clean, other.samplers_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
struct flush_request_task
{
atomic_t<bool> pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request
atomic_t<int> num_waiters{ 0 }; //Number of threads waiting for this request to be serviced
bool hard_sync = false;
flush_request_task(){}
void post(bool _hard_sync)
{
hard_sync = (hard_sync || _hard_sync);
pending_state = true;
num_waiters++;
}
void remove_one()
{
num_waiters--;
}
void clear_pending_flag()
{
hard_sync = false;
pending_state.store(false);
}
bool pending() const
{
return pending_state.load();
}
void consumer_wait() const
{
while (num_waiters.load() != 0)
{
_mm_lfence();
_mm_pause();
}
}
void producer_wait() const
{
while (pending_state.load())
{
_mm_lfence();
_mm_pause();
}
}
};
class VKGSRender : public GSRender
{
private:
@ -191,64 +307,6 @@ private:
vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info;
struct frame_context_t
{
VkSemaphore present_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
u32 used_descriptors = 0;
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
u32 present_image = UINT32_MAX;
command_buffer_chunk* swap_command_buffer = nullptr;
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
u64 last_frame_sync_time = 0;
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_semaphore = other.present_semaphore;
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
//Exchange storage (non-copyable)
void swap_storage(frame_context_t &other)
{
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
std::swap(samplers_to_clean, other.samplers_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
last_frame_sync_time = get_system_time();
}
void reset_heap_ptrs()
{
last_frame_sync_time = 0;
}
};
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage
frame_context_t m_aux_frame_context;
@ -277,8 +335,7 @@ private:
std::atomic<int> m_last_flushable_cb = {-1 };
std::mutex m_flush_queue_mutex;
std::atomic<bool> m_flush_commands = { false };
std::atomic<int> m_queued_threads = { 0 };
flush_request_task m_flush_requests;
std::thread::id rsx_thread;
std::atomic<u64> m_last_sync_event = { 0 };

View file

@ -13,8 +13,8 @@ namespace vk
VkSampler g_null_sampler = nullptr;
bool g_cb_no_interrupt_flag = false;
bool g_drv_no_primitive_restart_flag = false;
atomic_t<bool> g_cb_no_interrupt_flag { false };
atomic_t<bool> g_drv_no_primitive_restart_flag { false };
u64 g_num_processed_frames = 0;
u64 g_num_total_frames = 0;