From a49a0f2a86d2e4cdeff6e1d65d2cfdc76e8ed6ad Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 14 Mar 2019 15:27:50 +0300 Subject: [PATCH] vk/gl: Synchronization improvements - Properly wait for the buffer transfer operation to finish before map/readback! - Change vkFence to vkEvent which works more like a GL fence which is what is needed. - Implement supporting methods and functions - Do not destroy fence by immediately waiting after copying to dma buffer --- rpcs3/Emu/RSX/Common/texture_cache.h | 1 + rpcs3/Emu/RSX/GL/GLTextureCache.h | 61 ++++++++-------------------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 42 +++++-------------- rpcs3/Emu/RSX/VK/VKGSRender.h | 3 +- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 36 ++++++++++++++++ rpcs3/Emu/RSX/VK/VKHelpers.h | 1 + rpcs3/Emu/RSX/VK/VKTextureCache.h | 42 ++++++++++++++----- 7 files changed, 99 insertions(+), 87 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 105536660e..84455c943a 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2670,6 +2670,7 @@ namespace rsx else { verify(HERE), dst_is_render_target; + dst_subres.surface->on_write(); } if (rsx::get_resolution_scale_percent() != 100) diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index fe08587a1c..d42d12fe2b 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -61,7 +61,6 @@ namespace gl texture::format format = texture::format::rgba; texture::type type = texture::type::ubyte; - rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample; u8 get_pixel_size(texture::format fmt_, texture::type type_) { @@ -157,7 +156,7 @@ namespace gl using baseclass::cached_texture_section; void create(u16 w, u16 h, u16 depth, u16 mipmaps, gl::texture* image, u32 rsx_pitch, bool read_only, - gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) + gl::texture::format gl_format = gl::texture::format::rgba, gl::texture::type gl_type = gl::texture::type::ubyte, bool swap_bytes = false) { auto new_texture = static_cast(image); ASSERT(!exists() || !is_managed() || vram_texture == new_texture); @@ -166,11 +165,9 @@ namespace gl if (read_only) { managed_texture.reset(vram_texture); - aa_mode = rsx::surface_antialiasing::center_1_sample; } else { - aa_mode = static_cast(image)->read_aa_mode; ASSERT(managed_texture.get() == nullptr); } @@ -193,28 +190,6 @@ namespace gl baseclass::on_section_resources_created(); } - void create_read_only(gl::viewable_image* image, u32 width, u32 height, u32 depth, u32 mipmaps, u16 pitch) - { - ASSERT(!exists() || !is_managed() || vram_texture == image); - - verify(HERE), pitch; - - //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle - this->width = width; - this->height = height; - this->depth = depth; - this->mipmaps = mipmaps; - - managed_texture.reset(image); - vram_texture = image; - - rsx_pitch = pitch; - real_pitch = 0; - - // Notify baseclass - baseclass::on_section_resources_created(); - } - void set_dimensions(u32 width, u32 height, u32 /*depth*/, u32 pitch) { this->width = width; @@ -264,17 +239,20 @@ namespace gl u32 real_width = width; u32 real_height = height; - switch (aa_mode) + if (context == rsx::texture_upload_context::framebuffer_storage) { - case rsx::surface_antialiasing::center_1_sample: - break; - case rsx::surface_antialiasing::diagonal_centered_2_samples: - real_width *= 2; - break; - default: - real_width *= 2; - real_height *= 2; - break; + switch (static_cast(vram_texture)->read_aa_mode) + { + case rsx::surface_antialiasing::center_1_sample: + break; + case rsx::surface_antialiasing::diagonal_centered_2_samples: + real_width *= 2; + break; + default: + real_width *= 2; + real_height *= 2; + break; + } } areai src_area = { 0, 0, 0, 0 }; @@ -376,16 +354,13 @@ namespace gl verify(HERE), cmd.drv; copy_texture(cmd, blocking); - - if (blocking) - { - m_fence.wait_for_signal(); - } } void* map_synchronized(u32 offset, u32 size) { - AUDIT(synchronized); + AUDIT(synchronized && !m_fence.is_empty()); + + m_fence.wait_for_signal(); verify(HERE), (offset + size) <= pbo_size; glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); @@ -894,7 +869,7 @@ namespace gl cached.set_image_type(type); cached.set_gcm_format(gcm_format); - cached.create_read_only(image, width, height, depth, mipmaps, pitch); + cached.create(width, height, depth, mipmaps, image, pitch, true); cached.set_dirty(false); if (context != rsx::texture_upload_context::blit_engine_dst) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 0712b06a7e..28da25ca56 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -871,10 +871,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) const bool is_rsxthr = std::this_thread::get_id() == m_rsx_thread; bool has_queue_ref = false; - u64 sync_timestamp = 0ull; - for (const auto& tex : result.sections_to_flush) - sync_timestamp = std::max(sync_timestamp, tex->get_sync_timestamp()); - if (!is_rsxthr) { //Always submit primary cb to ensure state consistency (flush pending changes such as image transitions) @@ -882,7 +878,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) std::lock_guard lock(m_flush_queue_mutex); - m_flush_requests.post(sync_timestamp == 0ull); + m_flush_requests.post(false); has_queue_ref = true; } else if (!vk::is_uninterruptible()) @@ -895,33 +891,6 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) //LOG_ERROR(RSX, "Fault in uninterruptible code!"); } - if (sync_timestamp > 0) - { - // Wait for earliest cb submitted after the sync timestamp to finish - command_buffer_chunk *target_cb = nullptr; - for (auto &cb : m_primary_cb_list) - { - if (cb.last_sync >= sync_timestamp) - { - if (!cb.pending) - { - target_cb = nullptr; - break; - } - - if (target_cb == nullptr || target_cb->last_sync > cb.last_sync) - { - target_cb = &cb; - } - } - } - - if (target_cb) - { - target_cb->wait(GENERAL_WAIT_TIMEOUT); - } - } - if (has_queue_ref) { //Wait for the RSX thread to process request if it hasn't already @@ -3520,9 +3489,18 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst //Verify enough memory exists before attempting to handle data transfer check_heap_status(); + const auto old_speculations_count = m_texture_cache.get_num_cache_speculative_writes(); if (m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer)) { m_samplers_dirty.store(true); + m_current_command_buffer->flags |= cb_has_blit_transfer; + + if (m_texture_cache.get_num_cache_speculative_writes() > old_speculations_count) + { + // A speculative write happened, flush while the dma resource is valid + // TODO: Deeper investigation as to why this can trigger problems + flush_command_queue(); + } return true; } diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 53eec5bbe8..d40a99256e 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -50,7 +50,8 @@ extern u64 get_system_time(); enum command_buffer_data_flag { - cb_has_occlusion_task = 1 + cb_has_occlusion_task = 1, + cb_has_blit_transfer = 2 }; struct command_buffer_chunk: public vk::command_buffer diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 799dc006c0..e1e1f19a0b 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -654,6 +654,42 @@ namespace vk } } + VkResult wait_for_event(VkEvent event, u64 timeout) + { + u64 t = 0; + while (true) + { + switch (const auto status = vkGetEventStatus(*g_current_renderer, event)) + { + case VK_EVENT_SET: + return VK_SUCCESS; + case VK_EVENT_RESET: + break; + default: + die_with_error(HERE, status); + return status; + } + + if (timeout) + { + if (!t) + { + t = get_system_time(); + continue; + } + + if ((get_system_time() - t) > timeout) + { + LOG_ERROR(RSX, "[vulkan] vk::wait_for_event has timed out!"); + return VK_TIMEOUT; + } + } + + //std::this_thread::yield(); + _mm_pause(); + } + } + void die_with_error(const char* faulting_addr, VkResult error_code) { std::string error_message; diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 72b6afaa8a..7995c12dc6 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -181,6 +181,7 @@ namespace vk // Fence reset with driver workarounds in place void reset_fence(VkFence *pFence); VkResult wait_for_fence(VkFence pFence, u64 timeout = 0ull); + VkResult wait_for_event(VkEvent pEvent, u64 timeout = 0ull); void die_with_error(const char* faulting_addr, VkResult error_code); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index f8bcfea621..5ca67d65f5 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -36,7 +36,7 @@ namespace vk std::unique_ptr managed_texture = nullptr; //DMA relevant data - VkFence dma_fence = VK_NULL_HANDLE; + VkEvent dma_fence = VK_NULL_HANDLE; vk::render_device* m_device = nullptr; vk::viewable_image *vram_texture = nullptr; std::unique_ptr dma_buffer; @@ -82,9 +82,9 @@ namespace vk { dma_buffer.reset(); - if (dma_fence != nullptr) + if (dma_fence != VK_NULL_HANDLE) { - vkDestroyFence(*m_device, dma_fence, nullptr); + vkDestroyEvent(*m_device, dma_fence, nullptr); dma_fence = VK_NULL_HANDLE; } } @@ -164,9 +164,9 @@ namespace vk if (dma_fence == VK_NULL_HANDLE) { - VkFenceCreateInfo createInfo = {}; - createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - vkCreateFence(*m_device, &createInfo, nullptr, &dma_fence); + VkEventCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; + vkCreateEvent(*m_device, &createInfo, nullptr, &dma_fence); } if (dma_buffer.get() == nullptr) @@ -297,16 +297,32 @@ namespace vk if (manage_cb_lifetime) { - cmd.end(); - cmd.submit(submit_queue, {}, dma_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + VkFence submit_fence; + VkFenceCreateInfo create_info{}; + create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + vkCreateFence(*m_device, &create_info, nullptr, &submit_fence); - //Now we need to restart the command-buffer to restore it to the way it was before... - vk::wait_for_fence(dma_fence); - vk::reset_fence(&dma_fence); + cmd.end(); + cmd.submit(submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + // Now we need to restart the command-buffer to restore it to the way it was before... + vk::wait_for_fence(submit_fence); CHECK_RESULT(vkResetCommandBuffer(cmd, 0)); + // Cleanup + vkDestroyFence(*m_device, submit_fence, nullptr); + vkSetEvent(*m_device, dma_fence); if (cmd.access_hint != vk::command_buffer::access_type_hint::all) + { + // If this is a primary CB, restart it cmd.begin(); + } + } + else + { + // Only used when doing speculation + verify(HERE), vkGetEventStatus(*m_device, dma_fence) == VK_EVENT_RESET; + vkCmdSetEvent(cmd, dma_fence, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT); } synchronized = true; @@ -333,6 +349,10 @@ namespace vk { AUDIT(synchronized); + // Synchronize, reset dma_fence after waiting + vk::wait_for_event(dma_fence, GENERAL_WAIT_TIMEOUT); + vkResetEvent(*m_device, dma_fence); + return dma_buffer->map(offset, size); }