From 78c986b5dd1e9175f225d4f430dfc75813c8bb03 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Sat, 31 Oct 2020 19:17:38 +0300 Subject: [PATCH] Improve vm::range_lock Not sure how it ever worked Clear redundant vm::clear_range_lock usage --- rpcs3/Emu/Cell/PPUThread.cpp | 3 --- rpcs3/Emu/Cell/SPUThread.cpp | 7 ++----- rpcs3/Emu/Memory/vm.cpp | 37 +++++++++++++++++++++++++++++++---- rpcs3/Emu/Memory/vm_locking.h | 19 ++++++++++++------ 4 files changed, 48 insertions(+), 18 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 0569ff44a1..fcfefecc10 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1760,9 +1760,6 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) // Align address: we do not need the lower 7 bits anymore addr &= -128; - // Wait for range locks to clear - vm::clear_range_locks(addr, 128); - // Cache line data auto& cline_data = vm::_ref(addr); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b1d110f844..828e33f591 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2038,7 +2038,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* } // Obtain range lock as normal store - vm::range_lock(res, range_lock, eal, size0); + vm::range_lock(nullptr, range_lock, eal, size0); switch (size0) { @@ -2111,7 +2111,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* perf_meter<"DMA_PUT"_u64> perf2; // TODO: split range-locked stores in cache lines for consistency - auto& res = vm::reservation_acquire(eal, args.size); + auto res = &vm::reservation_acquire(eal, args.size); switch (u32 size = args.size) { @@ -2574,9 +2574,6 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) return false; } - // Wait for range locks to clear - vm::clear_range_locks(addr, 128); - vm::_ref>(addr) += 0; auto& super_data = *vm::get_super_ptr(addr); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 6fdd29072b..da6c6d4d86 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -152,12 +152,44 @@ namespace vm return &g_range_lock_set[std::countr_one(bits)]; } - void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size) + void range_lock_internal(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size) { perf_meter<"RHW_LOCK"_u64> perf0; while (true) { + const u64 lock_val = g_range_lock.load(); + const u64 lock_addr = static_cast(lock_val); // -> u64 + const u32 lock_size = static_cast(lock_val >> 35); + const u64 res_val = res ? res->load() & 127 : 0; + + u64 addr = begin; + + if (g_shareable[begin >> 16]) + { + addr = addr & 0xffff; + } + + if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] + { + range_lock->store(begin | (u64{size} << 32)); + + const u64 new_lock_val = g_range_lock.load(); + const u64 new_res_val = res ? res->load() & 127 : 0; + + if (!new_lock_val && !new_res_val) [[likely]] + { + return; + } + + if (new_lock_val == lock_val && !new_res_val) [[likely]] + { + return; + } + + range_lock->release(0); + } + std::shared_lock lock(g_mutex); u32 test = 0; @@ -180,9 +212,6 @@ namespace vm vm::_ref>(test) += 0; continue; } - - range_lock->release(begin | u64{size} << 32); - return; } } diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 0d6bf71610..d086d72e74 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -38,14 +38,15 @@ namespace vm // Register range lock for further use atomic_t* alloc_range_lock(); - void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size); + void range_lock_internal(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size); // Lock memory range - FORCE_INLINE void range_lock(atomic_t& res, atomic_t* range_lock, u32 begin, u32 size) + FORCE_INLINE void range_lock(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size) { const u64 lock_val = g_range_lock.load(); const u64 lock_addr = static_cast(lock_val); // -> u64 const u32 lock_size = static_cast(lock_val >> 35); + const u64 res_val = res ? res->load() & 127 : 0; u64 addr = begin; @@ -54,14 +55,20 @@ namespace vm addr = addr & 0xffff; } - if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !(res.load() & 127)) [[likely]] + if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] { // Optimistic locking - range_lock->release(begin | (u64{size} << 32)); + range_lock->store(begin | (u64{size} << 32)); const u64 new_lock_val = g_range_lock.load(); + const u64 new_res_val = res ? res->load() & 127 : 0; - if ((!new_lock_val || new_lock_val == lock_val) && !(res.load() & 127)) [[likely]] + if (!new_lock_val && !new_res_val) [[likely]] + { + return; + } + + if (new_lock_val == lock_val && !new_res_val) [[likely]] { return; } @@ -70,7 +77,7 @@ namespace vm } // Fallback to slow path - range_lock_internal(range_lock, begin, size); + range_lock_internal(res, range_lock, begin, size); } // Wait for all range locks to release in specified range