From 3b27b3c182fc0c23acd2609785fb284a1f5f372f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 23 Apr 2017 15:00:38 +0300 Subject: [PATCH] vk: Buffer sync timing tweaks vulkan: more sync timing fixes --- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 86 +++++++++++++++++++------------ rpcs3/Emu/RSX/VK/VKTextureCache.h | 24 +++++++-- 2 files changed, 73 insertions(+), 37 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index a3f7c7d1d5..3b9ef001bb 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -572,6 +572,8 @@ VKGSRender::~VKGSRender() return; } + m_current_command_buffer->reset(); + //Wait for queue vkQueueWaitIdle(m_swap_chain->get_present_queue()); @@ -642,29 +644,46 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) return m_texture_cache.invalidate_address(address); else { - if (!m_texture_cache.address_is_flushable(address)) + bool flushable, synchronized; + + std::tie(flushable, synchronized) = m_texture_cache.address_is_flushable(address); + if (!flushable) return false; - if (m_last_flushable_cb >= 0) + if (synchronized) { - if (m_primary_cb_list[m_last_flushable_cb].pending) - m_primary_cb_list[m_last_flushable_cb].wait(); + if (m_last_flushable_cb >= 0) + { + if (m_primary_cb_list[m_last_flushable_cb].pending) + m_primary_cb_list[m_last_flushable_cb].wait(); + } + + m_last_flushable_cb = -1; } - - if (std::this_thread::get_id() != rsx_thread) + else { - //TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise - m_flush_commands = true; - m_queued_threads++; + //This region is buffered, but no previous sync point has been put in place to start sync efforts + //Just stall and get what we have at this point + if (std::this_thread::get_id() != rsx_thread) + { + //TODO: Guard this when the renderer is flushing the command queue, might deadlock otherwise + m_flush_commands = true; + m_queued_threads++; - //This is awful! - while (m_flush_commands); + //This is awful! + while (m_flush_commands); - std::lock_guard lock(m_secondary_cb_guard); - bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); + std::lock_guard lock(m_secondary_cb_guard); + bool status = m_texture_cache.flush_address(address, *m_device, m_secondary_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); - m_queued_threads--; - return status; + m_queued_threads--; + return status; + } + else + { + //NOTE: If the rsx::thread is trampling its own data, we have an operation that should be moved to the GPU + flush_command_queue(); + } } std::lock_guard lock(m_secondary_cb_guard); @@ -721,7 +740,6 @@ void VKGSRender::begin() std::chrono::time_point stop = steady_clock::now(); m_setup_time += std::chrono::duration_cast(stop - start).count(); - m_draw_calls++; m_used_descriptors++; } @@ -826,6 +844,13 @@ void VKGSRender::end() std::chrono::time_point textures_end = steady_clock::now(); m_textures_upload_time += std::chrono::duration_cast(textures_end - textures_start).count(); + //upload_vertex_data is a memory op and can trigger an access violation + //render passes are supposed to be uninterruptible, so we have to finish everything first before we start the render pass + auto upload_info = upload_vertex_data(); + + std::chrono::time_point vertex_end = steady_clock::now(); + m_vertex_upload_time += std::chrono::duration_cast(vertex_end - textures_end).count(); + VkRenderPassBeginInfo rp_begin = {}; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.renderPass = current_render_pass; @@ -836,12 +861,6 @@ void VKGSRender::end() rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height(); vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - - auto upload_info = upload_vertex_data(); - - std::chrono::time_point vertex_end = steady_clock::now(); - m_vertex_upload_time += std::chrono::duration_cast(vertex_end - textures_end).count(); - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &descriptor_sets, 0, nullptr); @@ -867,6 +886,7 @@ void VKGSRender::end() m_draw_time += std::chrono::duration_cast(draw_end - vertex_end).count(); copy_render_targets_to_dma_location(); + m_draw_calls++; rsx::thread::end(); } @@ -928,6 +948,7 @@ void VKGSRender::clear_surface(u32 mask) if (m_current_present_image == 0xFFFF) return; init_buffers(); + copy_render_targets_to_dma_location(); float depth_clear = 1.f; u32 stencil_clear = 0; @@ -1070,15 +1091,19 @@ void VKGSRender::flush_command_queue(bool hard_sync) if (hard_sync) { - m_current_command_buffer->pending = true; - m_current_command_buffer->wait(); - //swap handler checks the pending flag, so call it here process_swap_request(); + //wait for the latest intruction to execute + m_current_command_buffer->pending = true; + m_current_command_buffer->wait(); + //Clear all command buffer statuses for (auto &cb : m_primary_cb_list) cb.poke(); + + m_last_flushable_cb = -1; + m_flush_commands = false; } else { @@ -1135,8 +1160,6 @@ void VKGSRender::process_swap_request() present.pImageIndices = &m_current_present_image; CHECK_RESULT(m_swap_chain->queuePresentKHR(m_swap_chain->get_present_queue(), &present)); } - else - fmt::throw_exception("How can a process be set without a pending flag?"); //Clean up all the resources from the last frame @@ -1162,14 +1185,11 @@ void VKGSRender::do_local_task() { if (m_flush_commands) { - //WARNING: This is a hard sync, expect horrendous performance - //Need to process this a little better! - //TODO: Link cb with draw buffer and wait for that specific cb based on address - LOG_ERROR(RSX, "Hard sync point is to be processed. Performance warning"); - flush_command_queue(true); + //TODO: Determine if a hard sync is necessary + //Pipeline barriers later may do a better job synchronizing than wholly stalling the pipeline + flush_command_queue(); m_flush_commands = false; - m_flush_draw_buffers = false; while (m_queued_threads); } } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 8d792599f1..c1efbee718 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -21,6 +21,7 @@ namespace vk //DMA relevant data u16 native_pitch; VkFence dma_fence = VK_NULL_HANDLE; + bool synchronized = false; vk::render_device* m_device = nullptr; vk::image *vram_texture = nullptr; std::unique_ptr dma_buffer; @@ -52,6 +53,10 @@ namespace vk //TODO: Properly compute these values this->native_pitch = native_pitch; pitch = cpu_address_range / height; + + //Even if we are managing the same vram section, we cannot guarantee contents are static + //The create method is only invoked when a new mangaged session is required + synchronized = false; } void release_dma_resources() @@ -193,6 +198,8 @@ namespace vk CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence)); } + + synchronized = true; } template @@ -217,7 +224,7 @@ namespace vk if (m_device == nullptr) m_device = &dev; - if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr) + if (!synchronized) { LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); copy_texture(cmd, heap_index, submit_queue, true, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); @@ -263,6 +270,11 @@ namespace vk dma_buffer->unmap(); //Its highly likely that this surface will be reused, so we just leave resources in place } + + bool is_synchronized() const + { + return synchronized; + } }; class texture_cache @@ -538,18 +550,22 @@ namespace vk region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue); } - bool address_is_flushable(u32 address) + std::tuple address_is_flushable(u32 address) { + if (address < texture_cache_range.first || + address > texture_cache_range.second) + return std::make_tuple(false, false); + for (auto &tex : m_cache) { if (tex.is_dirty()) continue; if (!tex.is_flushable()) continue; if (tex.overlaps(address)) - return true; + return std::make_tuple(true, tex.is_synchronized()); } - return false; + return std::make_tuple(false, false); } bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)