From 5260f4b47d4b57ad0dc0f98b5905ef38c1055e1a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 16 Mar 2019 12:14:11 +0300 Subject: [PATCH] rsx: Improvements to memory flush mechanism - Batch dma transfers whenever possible and do them in one go - vk: Always ensure that queued dma transfers are visible to the GPU before they are needed by the host Requires a little refactoring to allow proper communication of the commandbuffer state - vk: Code cleanup, the simplified mechanism makes it so that its not necessary to pass tons of args to methods - vk: Fixup - do not forcefully do dma transfers on sections in an invalidation zone! They may have been speculated correctly already --- rpcs3/Emu/RSX/Common/texture_cache.h | 42 +++++-- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 37 +++--- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 4 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 25 ++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 53 +++++---- rpcs3/Emu/RSX/VK/VKGSRender.h | 11 -- rpcs3/Emu/RSX/VK/VKHelpers.h | 20 ++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 126 +++++++++++---------- 8 files changed, 178 insertions(+), 140 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 054cf4ca8a..e1c79a9483 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -364,6 +364,8 @@ namespace rsx virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0; virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0; virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0; + virtual void prepare_for_dma_transfers(commandbuffer_type&) = 0; + virtual void cleanup_after_dma_transfers(commandbuffer_type&) = 0; public: virtual void destroy() = 0; @@ -397,13 +399,13 @@ namespace rsx template void err_once(const char* fmt, const Args&... params) { - logs::RSX.error(fmt, params...); + emit_once(true, fmt, params...); } template void warn_once(const char* fmt, const Args&... params) { - logs::RSX.warning(fmt, params...); + emit_once(false, fmt, params...); } /** @@ -458,19 +460,40 @@ namespace rsx }); } + rsx::simple_array sections_to_transfer; for (auto &surface : data.sections_to_flush) { - if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always) + if (!surface->is_synchronized()) + { + sections_to_transfer.push_back(surface); + } + else if (surface->get_memory_read_flags() == rsx::memory_read_flags::flush_always) { // This region is set to always read from itself (unavoidable hard sync) const auto ROP_timestamp = rsx::get_current_renderer()->ROP_sync_timestamp; - if (surface->is_synchronized() && ROP_timestamp > surface->get_sync_timestamp()) + if (ROP_timestamp > surface->get_sync_timestamp()) { - surface->copy_texture(cmd, true, std::forward(extras)...); + sections_to_transfer.push_back(surface); } } + } - surface->flush(cmd, std::forward(extras)...); + if (!sections_to_transfer.empty()) + { + // Batch all hard faults together + prepare_for_dma_transfers(cmd); + + for (auto &surface : sections_to_transfer) + { + surface->copy_texture(cmd, true, std::forward(extras)...); + } + + cleanup_after_dma_transfers(cmd); + } + + for (auto &surface : data.sections_to_flush) + { + surface->flush(); // Exclude this region when flushing other sections that should not trample it // If we overlap an excluded RO, set it as dirty @@ -1224,7 +1247,7 @@ namespace rsx } template - void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, std::tuple&& flush_extras, Args&&... extras) + void lock_memory_region(commandbuffer_type& cmd, image_storage_type* image, const address_range &rsx_range, u32 width, u32 height, u32 pitch, Args&&... extras) { AUDIT(g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer); // this method is only called when either WCB or WDB are enabled @@ -1244,10 +1267,7 @@ namespace rsx if (!region.is_locked() || region.get_context() != texture_upload_context::framebuffer_storage) { // Invalidate sections from surface cache occupying same address range - std::apply(&texture_cache::invalidate_range_impl_base, std::tuple_cat( - std::forward_as_tuple(this, cmd, rsx_range, invalidation_cause::superseded_by_fbo), - std::forward >(flush_extras) - )); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::superseded_by_fbo); } if (!region.is_locked() || region.can_be_reused()) diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index a045d1780a..4e755cd7f6 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -1303,14 +1303,10 @@ namespace rsx return get_context() != texture_upload_context::shader_read && get_memory_read_flags() != memory_read_flags::flush_always; } - void on_flush(bool miss) + void on_flush() { speculatively_flushed = false; - if (miss) - { - m_tex_cache->on_miss(*derived()); - } m_tex_cache->on_flush(); if (tracked_by_predictor()) @@ -1328,6 +1324,12 @@ namespace rsx m_tex_cache->on_speculative_flush(); } + void on_miss() + { + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); + m_tex_cache->on_miss(*derived()); + } + void touch(u64 tag) { last_write_tag = tag; @@ -1454,11 +1456,9 @@ namespace rsx public: // Returns false if there was a cache miss - template - bool flush(Args&&... extras) + void flush() { - if (flushed) return true; - bool miss = false; + if (flushed) return; // Sanity checks ASSERT(exists()); @@ -1469,19 +1469,12 @@ namespace rsx { flushed = true; flush_exclusions.clear(); - on_flush(miss); - return !miss; + on_flush(); + return; } - // If we are not synchronized, we must synchronize before proceeding (hard fault) - if (!synchronized) - { - LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); - derived()->synchronize(true, std::forward(extras)...); - miss = true; - - ASSERT(synchronized); // TODO ruipin: This might be possible in OGL. Revisit - } + // NOTE: Hard faults should have been pre-processed beforehand + ASSERT(synchronized); // Copy flush result to guest memory imp_flush(); @@ -1491,9 +1484,7 @@ namespace rsx flushed = true; derived()->finish_flush(); flush_exclusions.clear(); - on_flush(miss); - - return !miss; + on_flush(); } void add_flush_exclusion(const address_range& rng) diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index d04dee4471..c057185de7 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -392,7 +392,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { // Mark buffer regions as NO_ACCESS on Cell-visible side m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, - std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); + color_format.format, color_format.type, color_format.swap_bytes); } else { @@ -407,7 +407,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, - std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); + depth_format_gl.format, depth_format_gl.type, true); } else { diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index d42d12fe2b..2348d608b7 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -217,14 +217,18 @@ namespace gl } } - void copy_texture(gl::command_context& cmd, bool manage_lifetime) + void copy_texture(gl::command_context& cmd, bool miss) { ASSERT(exists()); - if (!manage_lifetime) + if (LIKELY(!miss)) { baseclass::on_speculative_flush(); } + else + { + baseclass::on_miss(); + } if (context == rsx::texture_upload_context::framebuffer_storage) { @@ -347,15 +351,6 @@ namespace gl /** * Flush */ - void synchronize(bool blocking, gl::command_context& cmd) - { - if (synchronized) - return; - - verify(HERE), cmd.drv; - copy_texture(cmd, blocking); - } - void* map_synchronized(u32 offset, u32 size) { AUDIT(synchronized && !m_fence.is_empty()); @@ -642,7 +637,7 @@ namespace gl if (src) { //Format mismatch - err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); + warn_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, (GLenum)ifmt); } //Apply base component map onto the new texture if a data cast has been done @@ -992,6 +987,12 @@ namespace gl } } + void prepare_for_dma_transfers(gl::command_context&) override + {} + + void cleanup_after_dma_transfers(gl::command_context&) override + {} + public: using baseclass::texture_cache; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 7c3f7a73b5..0e8052fb60 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -855,7 +855,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) std::lock_guard lock(m_secondary_cb_guard); const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read; - result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause, m_swapchain->get_graphics_queue())); + result = std::move(m_texture_cache.invalidate_address(m_secondary_command_buffer, address, cause)); } if (!result.violation_handled) @@ -897,7 +897,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) m_flush_requests.producer_wait(); } - m_texture_cache.flush_all(m_secondary_command_buffer, result, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_all(m_secondary_command_buffer, result); if (has_queue_ref) { @@ -913,7 +913,7 @@ void VKGSRender::on_invalidate_memory_range(const utils::address_range &range) { std::lock_guard lock(m_secondary_cb_guard); - auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap, m_swapchain->get_graphics_queue())); + auto data = std::move(m_texture_cache.invalidate_range(m_secondary_command_buffer, range, rsx::invalidation_cause::unmap)); AUDIT(data.empty()); if (data.violation_handled) @@ -1454,7 +1454,7 @@ void VKGSRender::end() if (rsx::method_registers.fragment_textures[i].enabled()) { - *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); const VkBool32 compare_enabled = (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8 || @@ -1526,7 +1526,7 @@ void VKGSRender::end() if (rsx::method_registers.vertex_textures[i].enabled()) { - *sampler_state = m_texture_cache._upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); bool replace = !vs_sampler_handles[i]; const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); @@ -1725,7 +1725,7 @@ void VKGSRender::end() m_occlusion_map[m_active_query_info->driver_handle].indices.push_back(occlusion_id); m_occlusion_map[m_active_query_info->driver_handle].command_buffer_to_wait = m_current_command_buffer; - m_current_command_buffer->flags |= cb_has_occlusion_task; + m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task; } // Apply write memory barriers @@ -1796,7 +1796,6 @@ void VKGSRender::end() m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); } - m_current_command_buffer->num_draws++; m_rtts.on_write(); rsx::thread::end(); @@ -2187,7 +2186,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint) { if (hint == rsx::FIFO_hint::hint_conditional_render_eval) { - if (m_current_command_buffer->flags & cb_has_occlusion_task) + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_occlusion_task) { // Occlusion test result evaluation is coming up, avoid a hard sync if (!m_flush_requests.pending()) @@ -2881,7 +2880,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) const utils::address_range rsx_range = m_surface_info[i].get_memory_range(); m_texture_cache.set_memory_read_flags(rsx_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, rsx_range); } m_surface_info[i].address = m_surface_info[i].pitch = 0; @@ -2898,7 +2897,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) auto old_format = vk::get_compatible_depth_surface_format(m_device->get_formats_support(), m_depth_surface_info.depth_format); const utils::address_range surface_range = m_depth_surface_info.get_memory_range(); m_texture_cache.set_memory_read_flags(surface_range, rsx::memory_read_flags::flush_once); - m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); + m_texture_cache.flush_if_cache_miss_likely(*m_current_command_buffer, surface_range); } m_depth_surface_info.address = m_depth_surface_info.pitch = 0; @@ -2944,6 +2943,12 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_texture_cache.notify_surface_changed(layout.zeta_address); } + // Before messing with memory properties, flush command queue if there are dma transfers queued up + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer) + { + flush_command_queue(); + } + const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); for (u8 index : m_draw_buffers) { @@ -2953,11 +2958,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (g_cfg.video.write_color_buffers) { m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, - m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); + m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], color_fmt_info.first, color_fmt_info.second); } else { - m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); + m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range); } } @@ -2968,11 +2973,11 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) { const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, - m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple{ m_swapchain->get_graphics_queue() }, gcm_format, false); + m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, gcm_format, false); } else { - m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); + m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range); } } @@ -3323,21 +3328,22 @@ void VKGSRender::flip(int buffer) const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage; const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask); - bool flush_queue = false; for (const auto & section : overlap) { - section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue()); - flush_queue = true; + if (!section->is_synchronized()) + { + section->copy_texture(*m_current_command_buffer, true); + } } - if (flush_queue) + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer) { // Submit for processing to lower hard fault penalty flush_command_queue(); } - m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read, m_swapchain->get_graphics_queue()); + m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read); image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); } } @@ -3487,16 +3493,15 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst //Verify enough memory exists before attempting to handle data transfer check_heap_status(); - const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes(); if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer)) { m_samplers_dirty.store(true); - m_current_command_buffer->flags |= cb_has_blit_transfer; + m_current_command_buffer->set_flag(vk::command_buffer::cb_has_blit_transfer); - if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count) + if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer) { - // A speculative write happened, flush while the dma resource is valid - // TODO: Deeper investigation as to why this can trigger problems + // A dma transfer has been queued onto this cb + // This likely means that we're done with the tranfers to the target (writes_likely_completed=1) flush_command_queue(); } return true; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index d40a99256e..049926d865 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -48,20 +48,11 @@ namespace vk extern u64 get_system_time(); -enum command_buffer_data_flag -{ - cb_has_occlusion_task = 1, - cb_has_blit_transfer = 2 -}; - struct command_buffer_chunk: public vk::command_buffer { VkFence submit_fence = VK_NULL_HANDLE; VkDevice m_device = VK_NULL_HANDLE; - u32 num_draws = 0; - u32 flags = 0; - std::atomic_bool pending = { false }; std::atomic last_sync = { 0 }; shared_mutex guard_mutex; @@ -100,8 +91,6 @@ struct command_buffer_chunk: public vk::command_buffer wait(FRAME_PRESENT_TIMEOUT); CHECK_RESULT(vkResetCommandBuffer(commands, 0)); - num_draws = 0; - flags = 0; } bool poke() diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 7995c12dc6..1cde91ca8c 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1168,6 +1168,14 @@ namespace vk } access_hint = flush_only; + enum command_buffer_data_flag : u32 + { + cb_has_occlusion_task = 1, + cb_has_blit_transfer = 2, + cb_has_dma_transfer = 4 + }; + u32 flags = 0; + public: command_buffer() {} ~command_buffer() {} @@ -1206,6 +1214,16 @@ namespace vk return *pool; } + void clear_flags() + { + flags = 0; + } + + void set_flag(command_buffer_data_flag flag) + { + flags |= flag; + } + operator VkCommandBuffer() const { return commands; @@ -1278,6 +1296,8 @@ namespace vk acquire_global_submit_lock(); CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); release_global_submit_lock(); + + clear_flags(); } }; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 5ca67d65f5..778646ea1b 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -66,11 +66,21 @@ namespace vk managed_texture.reset(vram_texture); } - //Even if we are managing the same vram section, we cannot guarantee contents are static - //The create method is only invoked when a new managed session is required - synchronized = false; - flushed = false; - sync_timestamp = 0ull; + if (synchronized) + { + // Even if we are managing the same vram section, we cannot guarantee contents are static + // The create method is only invoked when a new managed session is required + if (!flushed) + { + // Reset fence + verify(HERE), m_device, dma_buffer, dma_fence != VK_NULL_HANDLE; + vkResetEvent(*m_device, dma_fence); + } + + synchronized = false; + flushed = false; + sync_timestamp = 0ull; + } // Notify baseclass baseclass::on_section_resources_created(); @@ -148,14 +158,18 @@ namespace vk return flushed; } - void copy_texture(vk::command_buffer& cmd, bool manage_cb_lifetime, VkQueue submit_queue) + void copy_texture(vk::command_buffer& cmd, bool miss) { ASSERT(exists()); - if (!manage_cb_lifetime) + if (LIKELY(!miss)) { baseclass::on_speculative_flush(); } + else + { + baseclass::on_miss(); + } if (m_device == nullptr) { @@ -175,11 +189,6 @@ namespace vk dma_buffer.reset(new vk::buffer(*m_device, align(get_section_size(), 256), memory_type, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); } - if (manage_cb_lifetime) - { - cmd.begin(); - } - if (context == rsx::texture_upload_context::framebuffer_storage) { auto as_rtt = static_cast(vram_texture); @@ -295,36 +304,20 @@ namespace vk vkCmdCopyBuffer(cmd, mem_target->value, dma_buffer->value, 1, ©); } - if (manage_cb_lifetime) + if (LIKELY(!miss)) { - VkFence submit_fence; - VkFenceCreateInfo create_info{}; - create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - vkCreateFence(*m_device, &create_info, nullptr, &submit_fence); - - cmd.end(); - cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - // Now we need to restart the command-buffer to restore it to the way it was before... - vk::wait_for_fence(submit_fence); - CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); - - // Cleanup - vkDestroyFence(*m_device, submit_fence, nullptr); - vkSetEvent(*m_device, dma_fence); - if (cmd.access_hint != vk::command_buffer::access_type_hint::all) - { - // If this is a primary CB, restart it - cmd.begin(); - } + // If this is speculated, it should only occur once + verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET; } else { - // Only used when doing speculation - verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET; - vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); + // This is the only acceptable situation where a sync can occur twice, due to flush_always being set + vkResetEvent(*m_device, dma_fence); } + cmd.set_flag(vk::command_buffer::cb_has_dma_transfer); + vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); + synchronized = true; sync_timestamp = get_system_time(); } @@ -332,19 +325,6 @@ namespace vk /** * Flush */ - void synchronize(bool blocking, vk::command_buffer& cmd, VkQueue submit_queue) - { - if (synchronized) - return; - - if (m_device == nullptr) - { - m_device = &cmd.get_command_pool().get_owner(); - } - - copy_texture(cmd, blocking, submit_queue); - } - void* map_synchronized(u32 offset, u32 size) { AUDIT(synchronized); @@ -1104,6 +1084,44 @@ namespace vk } } + void prepare_for_dma_transfers(vk::command_buffer& cmd) override + { + if (!cmd.is_recording()) + { + cmd.begin(); + } + } + + void cleanup_after_dma_transfers(vk::command_buffer& cmd) override + { + // End recording + cmd.end(); + + if (cmd.access_hint != vk::command_buffer::access_type_hint::all) + { + // Primary access command queue, must restart it after + VkFence submit_fence; + VkFenceCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + vkCreateFence(*m_device, &info, nullptr, &submit_fence); + + cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + + vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT); + vkDestroyFence(*m_device, submit_fence, nullptr); + + CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); + cmd.begin(); + } + else + { + // Auxilliary command queue with auto-restart capability + cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + } + + verify(HERE), cmd.flags == 0; + } + public: using baseclass::texture_cache; @@ -1181,12 +1199,6 @@ namespace vk baseclass::on_frame_end(); } - template - sampled_image_descriptor _upload_texture(vk::command_buffer& cmd, RsxTextureType& tex, rsx::vk_render_targets& m_rtts) - { - return upload_texture(cmd, tex, m_rtts, const_cast(m_submit_queue)); - } - vk::image *upload_image_simple(vk::command_buffer& cmd, u32 address, u32 width, u32 height) { if (!m_formats_support.bgra8_linear) @@ -1243,13 +1255,13 @@ namespace vk bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { blitter helper; - auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, const_cast(m_submit_queue)); + auto reply = upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper); if (reply.succeeded) { if (reply.real_dst_size) { - flush_if_cache_miss_likely(cmd, reply.to_address_range(), m_submit_queue); + flush_if_cache_miss_likely(cmd, reply.to_address_range()); } return true;