diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 84bd80b4c6..05e7faa3b2 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -2599,10 +2599,14 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) if (raddr) { // Last check for event before we clear the reservation - if (raddr == addr || rtime != vm::reservation_acquire(raddr, 128) || !cmp_rdata(rdata, vm::_ref(raddr))) + if (raddr == addr) { set_events(SPU_EVENT_LR); } + else + { + get_events(SPU_EVENT_LR); + } } if (!vm::check_addr(addr, 1, vm::page_writable)) @@ -2974,7 +2978,7 @@ bool spu_thread::process_mfc_cmd() if (raddr && raddr != addr) { // Last check for event before we replace the reservation with a new one - if (vm::reservation_acquire(raddr, 128) != rtime || !cmp_rdata(temp, vm::_ref(raddr))) + if (reservation_check(raddr, temp)) { set_events(SPU_EVENT_LR); } @@ -3146,6 +3150,28 @@ bool spu_thread::process_mfc_cmd() ch_mfc_cmd.cmd, ch_mfc_cmd.lsa, ch_mfc_cmd.eal, ch_mfc_cmd.tag, ch_mfc_cmd.size); } +bool spu_thread::reservation_check(u32 addr, const decltype(rdata)& data) +{ + if (!addr) + { + // No reservation to be lost in the first place + return false; + } + + if ((vm::reservation_acquire(addr, 128) & -128) != rtime) + { + return true; + } + + // Ensure data is allocated (HACK: would raise LR event if not) + vm::range_lock(range_lock, addr, 128); + + const bool res = *range_lock && cmp_rdata(data, vm::_ref(addr)); + + range_lock->release(0); + return !res; +} + spu_thread::ch_events_t spu_thread::get_events(u32 mask_hint, bool waiting, bool reading) { if (auto mask1 = ch_events.load().mask; mask1 & ~SPU_EVENT_IMPLEMENTED) @@ -3157,10 +3183,13 @@ retry: u32 collect = 0; // Check reservation status and set SPU_EVENT_LR if lost - if (mask_hint & SPU_EVENT_LR && raddr && ((vm::reservation_acquire(raddr, sizeof(rdata)) & -128) != rtime || !cmp_rdata(rdata, vm::_ref(raddr)))) + if (mask_hint & SPU_EVENT_LR) { - collect |= SPU_EVENT_LR; - raddr = 0; + if (reservation_check(raddr, rdata)) + { + collect |= SPU_EVENT_LR; + raddr = 0; + } } // SPU Decrementer Event on underflow (use the upper 32-bits to determine it) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 8a1d53520c..a5fae796cd 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -807,6 +807,10 @@ public: return thread_type; } + // Returns true if reservation existed but was just discovered to be lost + // It is safe to use on any address, even if not directly accessed by SPU (so it's slower) + bool reservation_check(u32 addr, const decltype(rdata)& data); + bool read_reg(const u32 addr, u32& value); bool write_reg(const u32 addr, const u32 value); diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 96c5ee845f..25babbf44c 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -188,7 +188,7 @@ namespace vm break; } - range_lock->store(0); + range_lock->release(0); } std::shared_lock lock(g_mutex, std::try_to_lock); @@ -289,7 +289,35 @@ namespace vm // Block or signal new range locks g_range_lock = addr | u64{size} << 35 | flags; + const auto range = utils::address_range::start_length(addr, size); + while (true) + { + const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2) + { + // TODO (currently not possible): handle 2 64K pages (inverse range), or more pages + if (g_shareable[addr2 >> 16]) + { + addr2 &= 0xffff; + } + + ASSUME(size2); + + if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]] + { + return 1; + } + + return 0; + }); + + if (!bads) [[likely]] + { + break; + } + + _mm_pause(); + } } void passive_lock(cpu_thread& cpu) diff --git a/rpcs3/Emu/Memory/vm_locking.h b/rpcs3/Emu/Memory/vm_locking.h index 9d7865f0b2..d0d1e7ab5a 100644 --- a/rpcs3/Emu/Memory/vm_locking.h +++ b/rpcs3/Emu/Memory/vm_locking.h @@ -40,6 +40,7 @@ namespace vm void range_lock_internal(atomic_t* range_lock, u32 begin, u32 size); // Lock memory range + template FORCE_INLINE void range_lock(atomic_t* range_lock, u32 begin, u32 size) { const u64 lock_val = g_range_lock.load(); @@ -59,7 +60,7 @@ namespace vm addr = addr & 0xffff; } - if (addr + size <= lock_addr || addr >= lock_addr + lock_size || ((lock_val >> 32) ^ (range_locked >> 32)) & (range_full_mask >> 32)) [[likely]] + if (addr + size <= lock_addr || addr >= lock_addr + lock_size || (TouchMem && ((lock_val >> 32) ^ (range_locked >> 32)) & (range_full_mask >> 32))) [[likely]] { // Optimistic locking. // Note that we store the range we will be accessing, without any clamping. @@ -69,10 +70,30 @@ namespace vm if (!new_lock_val || new_lock_val == lock_val) [[likely]] { + if constexpr (!TouchMem) + { + if (!vm::check_addr(begin, size, vm::page_writable)) + { + range_lock->release(0); + } + } + return; } - range_lock->store(0); + range_lock->release(0); + + if constexpr (!TouchMem) + { + return; + } + } + + if constexpr (!TouchMem) + { + // Give up + range_lock->release(0); + return; } // Fallback to slow path