diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 2b426e05b6..c5d7a544f6 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2490,7 +2490,7 @@ namespace rsx // Invalidate const address_range tex_range = address_range::start_length(attributes.address, tex_size); - invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, {}, std::forward(extras)...); + invalidate_range_impl_base(cmd, tex_range, invalidation_cause::cause_is_read | invalidation_cause::cause_uses_strict_data_bounds, {}, std::forward(extras)...); // Upload from CPU. Note that sRGB conversion is handled in the FS auto uploaded = upload_image_from_cpu(cmd, tex_range, attributes.width, attributes.height, attributes.depth, tex.get_exact_mipmap_count(), attributes.pitch, attributes.gcm_format, diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 79ad1b610e..d01660775e 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -5,6 +5,7 @@ #include "TextureUtils.h" #include "Emu/Memory/vm.h" +#include "Emu/RSX/Host/MM.h" #include "util/vm.hpp" #include @@ -29,8 +30,7 @@ namespace rsx { ensure(range.is_page_range()); - //rsx_log.error("memory_protect(0x%x, 0x%x, %x)", static_cast(range.start), static_cast(range.length()), static_cast(prot)); - utils::memory_protect(vm::base(range.start), range.length(), prot); + rsx::mm_protect(vm::base(range.start), range.length(), prot); #ifdef TEXTURE_CACHE_DEBUG tex_cache_checker.set_protection(range, prot); diff --git a/rpcs3/Emu/RSX/Host/MM.cpp b/rpcs3/Emu/RSX/Host/MM.cpp new file mode 100644 index 0000000000..b8968f1d29 --- /dev/null +++ b/rpcs3/Emu/RSX/Host/MM.cpp @@ -0,0 +1,104 @@ +#include "stdafx.h" +#include "MM.h" +#include +#include + +#include +#include +#include +#include +#include + +namespace rsx +{ + rsx::simple_array g_deferred_mprotect_queue; + shared_mutex g_mprotect_queue_lock; + + void mm_flush_mprotect_queue_internal() + { + for (const auto& block : g_deferred_mprotect_queue) + { + utils::memory_protect(reinterpret_cast(block.start), block.length, block.prot); + } + + g_deferred_mprotect_queue.clear(); + } + + void mm_defer_mprotect_internal(u64 start, u64 length, utils::protection prot) + { + // We could stack and merge requests here, but that is more trouble than it is truly worth. + // A fresh call to memory_protect only takes a few nanoseconds of setup overhead, it is not worth the risk of hanging because of conflicts. + g_deferred_mprotect_queue.push_back({ start, length, prot }); + } + + void mm_protect(void* ptr, u64 length, utils::protection prot) + { + if (!g_cfg.video.async_host_memory_manager) + { + utils::memory_protect(ptr, length, prot); + return; + } + + // Naive merge. Eventually it makes more sense to do conflict resolution, but it's not as important. + const auto start = reinterpret_cast(ptr); + const auto end = start + length; + + std::lock_guard lock(g_mprotect_queue_lock); + + if (prot == utils::protection::rw || prot == utils::protection::wx) + { + // Basically an unlock op. Flush if any overlap is detected + for (const auto& block : g_deferred_mprotect_queue) + { + if (block.overlaps(start, end)) + { + mm_flush_mprotect_queue_internal(); + break; + } + } + + utils::memory_protect(ptr, length, prot); + return; + } + + // No, Ro, etc. + mm_defer_mprotect_internal(start, length, prot); + } + + void mm_flush() + { + std::lock_guard lock(g_mprotect_queue_lock); + mm_flush_mprotect_queue_internal(); + } + + void mm_flush(u32 vm_address) + { + std::lock_guard lock(g_mprotect_queue_lock); + if (g_deferred_mprotect_queue.empty()) + { + return; + } + + const auto addr = reinterpret_cast(vm::base(vm_address)); + for (const auto& block : g_deferred_mprotect_queue) + { + if (block.overlaps(addr)) + { + mm_flush_mprotect_queue_internal(); + return; + } + } + } + + void mm_flush_lazy() + { + if (!g_cfg.video.multithreaded_rsx) + { + mm_flush(); + return; + } + + auto& rsxdma = g_fxo->get(); + rsxdma.backend_ctrl(static_cast(mm_backend_ctrl::mm_flush), nullptr); + } +} diff --git a/rpcs3/Emu/RSX/Host/MM.h b/rpcs3/Emu/RSX/Host/MM.h new file mode 100644 index 0000000000..96e5229095 --- /dev/null +++ b/rpcs3/Emu/RSX/Host/MM.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include + +namespace rsx +{ + struct MM_block + { + u64 start; + u64 length; + utils::protection prot; + + inline bool overlaps(u64 start, u64 end) const + { + // [Start, End] is not a proper closed range, there is an off-by-one by design. + // FIXME: Use address_range64 + const u64 this_end = this->start + this->length; + return (this->start < end && start < this_end); + } + + inline bool overlaps(u64 addr) const + { + // [Start, End] is not a proper closed range, there is an off-by-one by design. + // FIXME: Use address_range64 + const u64 this_end = this->start + this->length; + return (addr >= start && addr < this_end); + } + }; + + enum class mm_backend_ctrl : u32 + { + mm_flush = 0x80000002 + }; + + void mm_protect(void* start, u64 length, utils::protection prot); + void mm_flush_lazy(); + void mm_flush(u32 vm_address); + void mm_flush(); +} diff --git a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp index fe6dc21ba5..674c66470d 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv47_sync.hpp @@ -7,6 +7,9 @@ namespace rsx { + void mm_flush_lazy(); + void mm_flush(); + namespace util { template @@ -27,6 +30,7 @@ namespace rsx if constexpr (FlushDMA) { // If the backend handled the request, this call will basically be a NOP + rsx::mm_flush_lazy(); g_fxo->get().sync(); } @@ -34,6 +38,7 @@ namespace rsx { // Manually flush the pipeline. // It is possible to stream report writes using the host GPU, but that generates too much submit traffic. + rsx::mm_flush(); RSX(ctx)->sync(); } diff --git a/rpcs3/Emu/RSX/VK/VKCommandStream.h b/rpcs3/Emu/RSX/VK/VKCommandStream.h index e559a688f4..31dc703593 100644 --- a/rpcs3/Emu/RSX/VK/VKCommandStream.h +++ b/rpcs3/Emu/RSX/VK/VKCommandStream.h @@ -9,7 +9,8 @@ namespace vk enum // callback commands { rctrl_queue_submit = 0x80000000, - rctrl_run_gc = 0x80000001 + rctrl_run_gc = 0x80000001, + rctrl_mem_protect = 0x80000002, }; struct submit_packet diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 371a777e94..c31a1ac515 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -15,6 +15,7 @@ #include "vkutils/scratch.h" #include "Emu/RSX/rsx_methods.h" +#include "Emu/RSX/Host/MM.h" #include "Emu/RSX/Host/RSXDMAWriter.h" #include "Emu/RSX/NV47/HW/context_accessors.define.h" #include "Emu/Memory/vm_locking.h" @@ -1010,6 +1011,8 @@ VKGSRender::~VKGSRender() bool VKGSRender::on_access_violation(u32 address, bool is_writing) { + rsx::mm_flush(address); + vk::texture_cache::thrashed_set result; { const rsx::invalidation_cause cause = is_writing ? rsx::invalidation_cause::deferred_write : rsx::invalidation_cause::deferred_read; @@ -2460,6 +2463,9 @@ void VKGSRender::close_and_submit_command_buffer(vk::fence* pFence, VkSemaphore { ensure(!m_queue_status.test_and_set(flush_queue_state::flushing)); + // Host MM sync before executing anything on the GPU + rsx::mm_flush(); + // Workaround for deadlock occuring during RSX offloader fault // TODO: Restructure command submission infrastructure to avoid this condition const bool sync_success = g_fxo->get().sync(); @@ -2823,6 +2829,11 @@ void VKGSRender::renderctl(u32 request_code, void* args) vk::on_event_completed(eid, true); break; } + case vk::rctrl_mem_protect: + { + rsx::mm_flush(); + break; + } default: fmt::throw_exception("Unhandled request code 0x%x", request_code); } diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 8a0e7737ca..a3a14268f8 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -178,6 +178,7 @@ struct cfg_root : cfg::node cfg::_bool decr_memory_layout{ this, "DECR memory layout", false}; // Force enable increased allowed main memory range as DECR console cfg::_bool host_label_synchronization{ this, "Allow Host GPU Labels", false }; cfg::_bool disable_msl_fast_math{ this, "Disable MSL Fast Math", false }; + cfg::_bool async_host_memory_manager{ this, "Asynchronous Host Memory Manager", true, true }; cfg::_enum output_scaling{ this, "Output Scaling Mode", output_scaling_mode::bilinear, true }; struct node_vk : cfg::node diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 9a196dd8af..f311845462 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -104,6 +104,7 @@ + @@ -621,6 +622,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 584787892a..c516f50756 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -1312,6 +1312,9 @@ Emu\GPU\RSX\Host Mini-Driver + + Emu\GPU\RSX\Host Mini-Driver + @@ -2644,6 +2647,9 @@ Emu\GPU\RSX\Host Mini-Driver + + Emu\GPU\RSX\Host Mini-Driver +