mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-08-12 02:59:51 +00:00
vk: Buffer sync timing tweaks
vulkan: more sync timing fixes
This commit is contained in:
parent
e1a75deb25
commit
3b27b3c182
2 changed files with 73 additions and 37 deletions
|
@ -572,6 +572,8 @@ VKGSRender::~VKGSRender()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_current_command_buffer->reset();
|
||||||
|
|
||||||
//Wait for queue
|
//Wait for queue
|
||||||
vkQueueWaitIdle(m_swap_chain->get_present_queue());
|
vkQueueWaitIdle(m_swap_chain->get_present_queue());
|
||||||
|
|
||||||
|
@ -642,29 +644,46 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
return m_texture_cache.invalidate_address(address);
|
return m_texture_cache.invalidate_address(address);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!m_texture_cache.address_is_flushable(address))
|
bool flushable, synchronized;
|
||||||
|
|
||||||
|
std::tie(flushable, synchronized) = m_texture_cache.address_is_flushable(address);
|
||||||
|
if (!flushable)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (m_last_flushable_cb >= 0)
|
if (synchronized)
|
||||||
{
|
{
|
||||||
if (m_primary_cb_list[m_last_flushable_cb].pending)
|
if (m_last_flushable_cb >= 0)
|
||||||
m_primary_cb_list[m_last_flushable_cb].wait();
|
{
|
||||||
|
if (m_primary_cb_list[m_last_flushable_cb].pending)
|
||||||
|
m_primary_cb_list[m_last_flushable_cb].wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_last_flushable_cb = -1;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if (std::this_thread::get_id() != rsx_thread)
|
|
||||||
{
|
{
|
||||||
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
|
//This region is buffered, but no previous sync point has been put in place to start sync efforts
|
||||||
m_flush_commands = true;
|
//Just stall and get what we have at this point
|
||||||
m_queued_threads++;
|
if (std::this_thread::get_id() != rsx_thread)
|
||||||
|
{
|
||||||
|
//TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise
|
||||||
|
m_flush_commands = true;
|
||||||
|
m_queued_threads++;
|
||||||
|
|
||||||
//This is awful!
|
//This is awful!
|
||||||
while (m_flush_commands);
|
while (m_flush_commands);
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
|
||||||
|
|
||||||
m_queued_threads--;
|
m_queued_threads--;
|
||||||
return status;
|
return status;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU
|
||||||
|
flush_command_queue();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
std::lock_guard<std::mutex> lock(m_secondary_cb_guard);
|
||||||
|
@ -721,7 +740,6 @@ void VKGSRender::begin()
|
||||||
std::chrono::time_point<steady_clock> stop = steady_clock::now();
|
std::chrono::time_point<steady_clock> stop = steady_clock::now();
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
|
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
|
||||||
|
|
||||||
m_draw_calls++;
|
|
||||||
m_used_descriptors++;
|
m_used_descriptors++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -826,6 +844,13 @@ void VKGSRender::end()
|
||||||
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
|
||||||
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||||
|
|
||||||
|
//upload_vertex_data is a memory op and can trigger an access violation
|
||||||
|
//render passes are supposed to be uninterruptible, so we have to finish everything first before we start the render pass
|
||||||
|
auto upload_info = upload_vertex_data();
|
||||||
|
|
||||||
|
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||||
|
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
|
||||||
|
|
||||||
VkRenderPassBeginInfo rp_begin = {};
|
VkRenderPassBeginInfo rp_begin = {};
|
||||||
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||||
rp_begin.renderPass = current_render_pass;
|
rp_begin.renderPass = current_render_pass;
|
||||||
|
@ -836,12 +861,6 @@ void VKGSRender::end()
|
||||||
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
|
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height();
|
||||||
|
|
||||||
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
|
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
|
||||||
|
|
||||||
auto upload_info = upload_vertex_data();
|
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
|
||||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
|
|
||||||
|
|
||||||
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
||||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
|
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr);
|
||||||
|
|
||||||
|
@ -867,6 +886,7 @@ void VKGSRender::end()
|
||||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
|
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
|
||||||
|
|
||||||
copy_render_targets_to_dma_location();
|
copy_render_targets_to_dma_location();
|
||||||
|
m_draw_calls++;
|
||||||
|
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
}
|
}
|
||||||
|
@ -928,6 +948,7 @@ void VKGSRender::clear_surface(u32 mask)
|
||||||
if (m_current_present_image == 0xFFFF) return;
|
if (m_current_present_image == 0xFFFF) return;
|
||||||
|
|
||||||
init_buffers();
|
init_buffers();
|
||||||
|
copy_render_targets_to_dma_location();
|
||||||
|
|
||||||
float depth_clear = 1.f;
|
float depth_clear = 1.f;
|
||||||
u32 stencil_clear = 0;
|
u32 stencil_clear = 0;
|
||||||
|
@ -1070,15 +1091,19 @@ void VKGSRender::flush_command_queue(bool hard_sync)
|
||||||
|
|
||||||
if (hard_sync)
|
if (hard_sync)
|
||||||
{
|
{
|
||||||
m_current_command_buffer->pending = true;
|
|
||||||
m_current_command_buffer->wait();
|
|
||||||
|
|
||||||
//swap handler checks the pending flag, so call it here
|
//swap handler checks the pending flag, so call it here
|
||||||
process_swap_request();
|
process_swap_request();
|
||||||
|
|
||||||
|
//wait for the latest intruction to execute
|
||||||
|
m_current_command_buffer->pending = true;
|
||||||
|
m_current_command_buffer->wait();
|
||||||
|
|
||||||
//Clear all command buffer statuses
|
//Clear all command buffer statuses
|
||||||
for (auto &cb : m_primary_cb_list)
|
for (auto &cb : m_primary_cb_list)
|
||||||
cb.poke();
|
cb.poke();
|
||||||
|
|
||||||
|
m_last_flushable_cb = -1;
|
||||||
|
m_flush_commands = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1135,8 +1160,6 @@ void VKGSRender::process_swap_request()
|
||||||
present.pImageIndices = &m_current_present_image;
|
present.pImageIndices = &m_current_present_image;
|
||||||
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
|
CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
fmt::throw_exception("How can a process be set without a pending flag?");
|
|
||||||
|
|
||||||
//Clean up all the resources from the last frame
|
//Clean up all the resources from the last frame
|
||||||
|
|
||||||
|
@ -1162,14 +1185,11 @@ void VKGSRender::do_local_task()
|
||||||
{
|
{
|
||||||
if (m_flush_commands)
|
if (m_flush_commands)
|
||||||
{
|
{
|
||||||
//WARNING: This is a hard sync, expect horrendous performance
|
//TODO: Determine if a hard sync is necessary
|
||||||
//Need to process this a little better!
|
//Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline
|
||||||
//TODO: Link cb with draw buffer and wait for that specific cb based on address
|
flush_command_queue();
|
||||||
LOG_ERROR(RSX, "Hard sync point is to be processed. Performance warning");
|
|
||||||
flush_command_queue(true);
|
|
||||||
|
|
||||||
m_flush_commands = false;
|
m_flush_commands = false;
|
||||||
m_flush_draw_buffers = false;
|
|
||||||
while (m_queued_threads);
|
while (m_queued_threads);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ namespace vk
|
||||||
//DMA relevant data
|
//DMA relevant data
|
||||||
u16 native_pitch;
|
u16 native_pitch;
|
||||||
VkFence dma_fence = VK_NULL_HANDLE;
|
VkFence dma_fence = VK_NULL_HANDLE;
|
||||||
|
bool synchronized = false;
|
||||||
vk::render_device* m_device = nullptr;
|
vk::render_device* m_device = nullptr;
|
||||||
vk::image *vram_texture = nullptr;
|
vk::image *vram_texture = nullptr;
|
||||||
std::unique_ptr<vk::buffer> dma_buffer;
|
std::unique_ptr<vk::buffer> dma_buffer;
|
||||||
|
@ -52,6 +53,10 @@ namespace vk
|
||||||
//TODO: Properly compute these values
|
//TODO: Properly compute these values
|
||||||
this->native_pitch = native_pitch;
|
this->native_pitch = native_pitch;
|
||||||
pitch = cpu_address_range / height;
|
pitch = cpu_address_range / height;
|
||||||
|
|
||||||
|
//Even if we are managing the same vram section, we cannot guarantee contents are static
|
||||||
|
//The create method is only invoked when a new mangaged session is required
|
||||||
|
synchronized = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void release_dma_resources()
|
void release_dma_resources()
|
||||||
|
@ -193,6 +198,8 @@ namespace vk
|
||||||
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
|
CHECK_RESULT(vkResetCommandBuffer(cmd, 0));
|
||||||
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
|
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
synchronized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
@ -217,7 +224,7 @@ namespace vk
|
||||||
if (m_device == nullptr)
|
if (m_device == nullptr)
|
||||||
m_device = &dev;
|
m_device = &dev;
|
||||||
|
|
||||||
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
|
if (!synchronized)
|
||||||
{
|
{
|
||||||
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
|
||||||
copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
@ -263,6 +270,11 @@ namespace vk
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
//Its highly likely that this surface will be reused, so we just leave resources in place
|
//Its highly likely that this surface will be reused, so we just leave resources in place
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_synchronized() const
|
||||||
|
{
|
||||||
|
return synchronized;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class texture_cache
|
class texture_cache
|
||||||
|
@ -538,18 +550,22 @@ namespace vk
|
||||||
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
|
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool address_is_flushable(u32 address)
|
std::tuple<bool, bool> address_is_flushable(u32 address)
|
||||||
{
|
{
|
||||||
|
if (address < texture_cache_range.first ||
|
||||||
|
address > texture_cache_range.second)
|
||||||
|
return std::make_tuple(false, false);
|
||||||
|
|
||||||
for (auto &tex : m_cache)
|
for (auto &tex : m_cache)
|
||||||
{
|
{
|
||||||
if (tex.is_dirty()) continue;
|
if (tex.is_dirty()) continue;
|
||||||
if (!tex.is_flushable()) continue;
|
if (!tex.is_flushable()) continue;
|
||||||
|
|
||||||
if (tex.overlaps(address))
|
if (tex.overlaps(address))
|
||||||
return true;
|
return std::make_tuple(true, tex.is_synchronized());
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return std::make_tuple(false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue