diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b4a7108ff6..cd420ed90d 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2032,7 +2032,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* } // Obtain range lock as normal store - vm::range_lock(nullptr, range_lock, eal, size0); + vm::range_lock(range_lock, eal, size0); switch (size0) { @@ -2104,35 +2104,32 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* perf_meter<"DMA_PUT"_u64> perf2; - // Temporarily disabled, may be removed at all - atomic_t* res = nullptr; - switch (u32 size = args.size) { case 1: { - vm::range_lock(res, range_lock, eal, 1); + vm::range_lock(range_lock, eal, 1); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 2: { - vm::range_lock(res, range_lock, eal, 2); + vm::range_lock(range_lock, eal, 2); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 4: { - vm::range_lock(res, range_lock, eal, 4); + vm::range_lock(range_lock, eal, 4); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; } case 8: { - vm::range_lock(res, range_lock, eal, 8); + vm::range_lock(range_lock, eal, 8); *reinterpret_cast(dst) = *reinterpret_cast(src); range_lock->release(0); break; @@ -2141,7 +2138,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* { if (((eal & 127) + size) <= 128) { - vm::range_lock(res, range_lock, eal, size); + vm::range_lock(range_lock, eal, size); while (size) { @@ -2167,7 +2164,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* size -= size0; // Split locking + transfer in two parts (before 64K border, and after it) - vm::range_lock(res, range_lock, range_addr, size0); + vm::range_lock(range_lock, range_addr, size0); // Avoid unaligned stores in mov_rdata_avx if (reinterpret_cast(dst) & 0x10) @@ -2201,7 +2198,7 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8* range_addr = nexta; } - vm::range_lock(res, range_lock, range_addr, range_end - range_addr); + vm::range_lock(range_lock, range_addr, range_end - range_addr); // Avoid unaligned stores in mov_rdata_avx if (reinterpret_cast(dst) & 0x10) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 7f3cf2d275..2a8ec64ce6 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -152,7 +152,7 @@ namespace vm return &g_range_lock_set[std::countr_one(bits)]; } - void range_lock_internal(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size) + void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size) { perf_meter<"RHW_LOCK"_u64> perf0; @@ -168,30 +168,22 @@ namespace vm const u64 lock_val = g_range_lock.load(); const u64 lock_addr = static_cast(lock_val); // -> u64 const u32 lock_size = static_cast(lock_val >> 35); - const u64 lock_bits = lock_val & range_mask; - const u64 res_val = res ? res->load() & 127 : 0; u64 addr = begin; - // Only useful for range_locked, and is reliable in this case - if (g_shareable[begin >> 16]) + // See range_lock() + if (g_shareable[begin >> 16] | (((lock_val >> 32) & (range_full_mask >> 32)) ^ (range_locked >> 32))) { addr = addr & 0xffff; } - if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] + if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]] { range_lock->store(begin | (u64{size} << 32)); const u64 new_lock_val = g_range_lock.load(); - const u64 new_res_val = res ? res->load() & 127 : 0; - if (!new_lock_val && !new_res_val) [[likely]] - { - break; - } - - if (new_lock_val == lock_val && !new_res_val) [[likely]] + if (!(new_lock_val | (new_lock_val != lock_val))) [[likely]] { break; } @@ -820,7 +812,7 @@ namespace vm } // Protect range locks from actual memory protection changes - _lock_shareable_cache(range_deallocation, addr, size); + _lock_shareable_cache(range_allocation, addr, size); if (shm && shm->flags() != 0 && g_shareable[addr >> 16]) { diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 824e84ccc6..bb07f7c190 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -18,15 +18,14 @@ namespace vm range_readable = 1ull << 32, range_writable = 2ull << 32, range_executable = 4ull << 32, - range_mask = 7ull << 32, + range_full_mask = 7ull << 32, /* flag combinations with special meaning */ - range_normal = 3ull << 32, // R+W + range_normal = 3ull << 32, // R+W, testing as mask for zero can check no access range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address - range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged + range_sharing = 6ull << 32, // Range being registered as shared, flags are unchanged range_allocation = 0, // Allocation, no safe access - range_deallocation = 6ull << 32, // Deallocation, no safe access }; extern atomic_t g_range_lock; @@ -39,39 +38,32 @@ namespace vm // Register range lock for further use atomic_t* alloc_range_lock(); - void range_lock_internal(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size); + void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size); // Lock memory range - FORCE_INLINE void range_lock(atomic_t* res, atomic_t* range_lock, u32 begin, u32 size) + FORCE_INLINE void range_lock(atomic_t* range_lock, u32 begin, u32 size) { const u64 lock_val = g_range_lock.load(); const u64 lock_addr = static_cast(lock_val); // -> u64 const u32 lock_size = static_cast(lock_val >> 35); - const u64 lock_bits = lock_val & range_mask; - const u64 res_val = res ? res->load() & 127 : 0; u64 addr = begin; - // Only used for range_locked and is reliable in this case - if (g_shareable[begin >> 16]) + // Optimization: if range_locked is not used, the addr check will always pass + // Otherwise, g_shareable is unchanged and its value is reliable to read + if (g_shareable[begin >> 16] | (((lock_val >> 32) & (range_full_mask >> 32)) ^ (range_locked >> 32))) { addr = addr & 0xffff; } - if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]] + if (addr + size <= lock_addr || addr >= lock_addr + lock_size) [[likely]] { // Optimistic locking range_lock->store(begin | (u64{size} << 32)); const u64 new_lock_val = g_range_lock.load(); - const u64 new_res_val = res ? res->load() & 127 : 0; - if (!new_lock_val && !new_res_val) [[likely]] - { - return; - } - - if (new_lock_val == lock_val && !new_res_val) [[likely]] + if (!(new_lock_val | (new_lock_val != lock_val))) [[likely]] { return; } @@ -80,7 +72,7 @@ namespace vm } // Fallback to slow path - range_lock_internal(res, range_lock, begin, size); + range_lock_internal(range_lock, begin, size); } // Release it