From d0ebba6c5e46bdd0b1fcb1a94b48757a3fa1ee23 Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:30:34 +0300 Subject: [PATCH] CELL: New reservation notifications mechanism --- rpcs3/Emu/CPU/CPUThread.cpp | 2 +- rpcs3/Emu/Cell/PPUThread.cpp | 12 ++-- rpcs3/Emu/Cell/SPUThread.cpp | 72 +++++++++++------------ rpcs3/Emu/Cell/lv2/lv2.cpp | 8 +-- rpcs3/Emu/Cell/lv2/sys_spu.cpp | 4 +- rpcs3/Emu/Memory/vm.cpp | 2 +- rpcs3/Emu/Memory/vm_reservation.h | 98 ++++++++++++++++++++++++++++++- 7 files changed, 143 insertions(+), 55 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 5d6e24db1e..8900119abf 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -1075,7 +1075,7 @@ cpu_thread& cpu_thread::operator=(thread_state) { if (u32 resv = atomic_storage::load(thread->raddr)) { - vm::reservation_notifier(resv).notify_all(); + vm::reservation_notifier_notify(resv); } } } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index f7546ee148..b23c95bae1 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -3302,8 +3302,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm& get_resrv_waiters_count(u32 raddr); - template static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) { @@ -3553,11 +3551,11 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) if (ppu.res_notify_time == (vm::reservation_acquire(notify) & -128)) { ppu.state += cpu_flag::wait; - vm::reservation_notifier(notify).notify_all(); + vm::reservation_notifier_notify(notify); notified = true; } - if (get_resrv_waiters_count(addr)) + if (vm::reservation_notifier_count(addr)) { if (!notified) { @@ -3566,7 +3564,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) } else if ((addr ^ notify) & -128) { - res.notify_all(); + vm::reservation_notifier_notify(addr); ppu.res_notify = 0; } } @@ -3581,7 +3579,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) { // Try to postpone notification to when PPU is asleep or join notifications on the same address // This also optimizes a mutex - won't notify after lock is aqcuired (prolonging the critical section duration), only notifies on unlock - if (get_resrv_waiters_count(addr)) + if (vm::reservation_notifier_count(addr)) { ppu.res_notify = addr; ppu.res_notify_time = rtime + 128; @@ -3607,7 +3605,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) if (ppu.res_notify_time == (vm::reservation_acquire(notify) & -128)) { ppu.state += cpu_flag::wait; - vm::reservation_notifier(notify).notify_all(); + vm::reservation_notifier_notify(notify); static_cast(ppu.test_stopped()); } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index a71f5161fd..816689bc6c 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -444,12 +444,9 @@ waitpkg_func static void __tpause(u32 cycles, u32 cstate) } #endif -static std::array, 128> g_resrv_waiters_count; - -extern atomic_t& get_resrv_waiters_count(u32 raddr) +namespace vm { - // Storage efficient method to distinguish different nearby addresses (which are likely) - return g_resrv_waiters_count[std::popcount(raddr & -512) + ((raddr / 128) % 4) * 32]; + std::array, 512> g_resrv_waiters_count{}; } void do_cell_atomic_128_store(u32 addr, const void* to_write); @@ -3822,7 +3819,7 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) { if (raddr) { - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); raddr = 0; } @@ -4012,7 +4009,7 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) } do_cell_atomic_128_store(addr, _ptr(args.lsa & 0x3ff80)); - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); } bool spu_thread::do_mfc(bool can_escape, bool must_finish) @@ -4625,12 +4622,11 @@ bool spu_thread::process_mfc_cmd() // Spinning, might as well yield cpu resources state += cpu_flag::wait; - // Storage efficient method to distinguish different nearby addresses (which are likely) - get_resrv_waiters_count(addr)++; - - vm::reservation_notifier(addr).wait(this_time, atomic_wait_timeout{100'000}); - - get_resrv_waiters_count(addr)--; + if (auto wait_var = vm::reservation_notifier_begin_wait(addr, rtime)) + { + utils::bless>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{100'000}); + vm::reservation_notifier_end_wait(*wait_var); + } static_cast(test_stopped()); @@ -4673,10 +4669,10 @@ bool spu_thread::process_mfc_cmd() g_unchanged++; // Try to forcefully change timestamp in order to notify threads - if (get_resrv_waiters_count(addr) && res.compare_and_swap_test(new_time, new_time + 128)) + if (vm::reservation_notifier_count(addr) && res.compare_and_swap_test(new_time, new_time + 128)) { rtime = this_time - 128; - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); } } else @@ -4694,9 +4690,9 @@ bool spu_thread::process_mfc_cmd() if (this_time == rtime) { // Try to forcefully change timestamp in order to notify threads - if (get_resrv_waiters_count(addr) && res.compare_and_swap_test(this_time, this_time + 128)) + if (vm::reservation_notifier_count(addr) && res.compare_and_swap_test(this_time, this_time + 128)) { - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); } } @@ -5503,9 +5499,9 @@ s64 spu_thread::get_ch_value(u32 ch) else if (!cmp_rdata(rdata, *resrv_mem)) { // Only data changed, try to notify waiters - if (get_resrv_waiters_count(raddr) && vm::reservation_acquire(raddr).compare_and_swap_test(rtime, rtime + 128)) + if (vm::reservation_notifier_count(raddr) && vm::reservation_acquire(raddr).compare_and_swap_test(rtime, rtime + 128)) { - vm::reservation_notifier(raddr).notify_all(); + vm::reservation_notifier_notify(raddr); } set_lr = true; @@ -5547,21 +5543,21 @@ s64 spu_thread::get_ch_value(u32 ch) { // Wait with extended timeout, in this situation we have notifications for nearly all writes making it possible // Abort notifications are handled specially for performance reasons - get_resrv_waiters_count(raddr)++; - vm::reservation_notifier(raddr).wait(rtime, atomic_wait_timeout{300'000}); - get_resrv_waiters_count(raddr)--; + if (auto wait_var = vm::reservation_notifier_begin_wait(raddr, rtime)) + { + utils::bless>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{300'000}); + vm::reservation_notifier_end_wait(*wait_var); + } + continue; } const u32 _raddr = this->raddr; #ifdef __linux__ - get_resrv_waiters_count(_raddr)++; - vm::reservation_notifier(_raddr).wait(rtime, atomic_wait_timeout{50'000}); - get_resrv_waiters_count(_raddr)--; - - if (get_resrv_waiters_count(_raddr) && vm::reservation_acquire(_raddr) == rtime + 128) + if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime)) { - vm::reservation_notifier(_raddr).notify_all(); + utils::bless>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{50'000}); + vm::reservation_notifier_end_wait(*wait_var); } #else static thread_local bool s_tls_try_notify = false; @@ -5600,7 +5596,7 @@ s64 spu_thread::get_ch_value(u32 ch) else if (!cmp_rdata(_this->rdata, *_this->resrv_mem)) { // Only data changed, try to notify waiters - if (get_resrv_waiters_count(raddr) >= 2 && vm::reservation_acquire(raddr).compare_and_swap_test(_this->rtime, _this->rtime + 128)) + if (vm::reservation_notifier_count(raddr) >= 2 && vm::reservation_acquire(raddr).compare_and_swap_test(_this->rtime, _this->rtime + 128)) { s_tls_try_notify = true; } @@ -5618,13 +5614,15 @@ s64 spu_thread::get_ch_value(u32 ch) return true; }); - get_resrv_waiters_count(_raddr)++; - vm::reservation_notifier(_raddr).wait(rtime, atomic_wait_timeout{80'000}); - get_resrv_waiters_count(_raddr)--; - - if (s_tls_try_notify && get_resrv_waiters_count(_raddr) && vm::reservation_acquire(_raddr) == rtime + 128) + if (auto wait_var = vm::reservation_notifier_begin_wait(_raddr, rtime)) { - vm::reservation_notifier(_raddr).notify_all(); + utils::bless>(&wait_var->raw().wait_flag)->wait(1, atomic_wait_timeout{80'000}); + vm::reservation_notifier_end_wait(*wait_var); + } + + if (s_tls_try_notify && vm::reservation_notifier_count(_raddr) && vm::reservation_acquire(_raddr) == rtime + 128) + { + vm::reservation_notifier_notify(_raddr); } #endif } @@ -6548,7 +6546,7 @@ bool spu_thread::stop_and_signal(u32 code) if (prev_resv && prev_resv != resv) { // Batch reservation notifications if possible - vm::reservation_notifier(prev_resv).notify_all(); + vm::reservation_notifier_notify(prev_resv); } prev_resv = resv; @@ -6559,7 +6557,7 @@ bool spu_thread::stop_and_signal(u32 code) if (prev_resv) { - vm::reservation_notifier(prev_resv).notify_all(); + vm::reservation_notifier_notify(prev_resv); } check_state(); diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index 5cf5727342..e05c87c39a 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1340,7 +1340,7 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout) else if (usz notify_later_idx = std::basic_string_view{g_to_notify, std::size(g_to_notify)}.find_first_of(std::add_pointer_t{}); notify_later_idx != umax) { - g_to_notify[notify_later_idx] = &vm::reservation_notifier(addr); + g_to_notify[notify_later_idx] = vm::reservation_notifier_notify(addr, true); if (notify_later_idx < std::size(g_to_notify) - 1) { @@ -1350,7 +1350,7 @@ bool lv2_obj::sleep(cpu_thread& cpu, const u64 timeout) } else { - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); } } } @@ -1394,7 +1394,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio) else if (usz notify_later_idx = std::basic_string_view{g_to_notify, std::size(g_to_notify)}.find_first_of(std::add_pointer_t{}); notify_later_idx != umax) { - g_to_notify[notify_later_idx] = &vm::reservation_notifier(addr); + g_to_notify[notify_later_idx] = vm::reservation_notifier_notify(addr, true); if (notify_later_idx < std::size(g_to_notify) - 1) { @@ -1404,7 +1404,7 @@ bool lv2_obj::awake(cpu_thread* thread, s32 prio) } else { - vm::reservation_notifier(addr).notify_all(); + vm::reservation_notifier_notify(addr); } } } diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index b8f71d85cd..07a3e9f0ef 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -1399,7 +1399,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value) if (prev_resv && prev_resv != resv) { // Batch reservation notifications if possible - vm::reservation_notifier(prev_resv).notify_all(); + vm::reservation_notifier_notify(prev_resv); } prev_resv = resv; @@ -1409,7 +1409,7 @@ error_code sys_spu_thread_group_terminate(ppu_thread& ppu, u32 id, s32 value) if (prev_resv) { - vm::reservation_notifier(prev_resv).notify_all(); + vm::reservation_notifier_notify(prev_resv); } group->exit_status = value; diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index db9db94db3..a0407645f8 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -116,7 +116,7 @@ namespace vm { if (ok) { - reservation_notifier(addr).notify_all(); + reservation_notifier_notify(addr); } if (cpu && !had_wait && cpu->test_stopped()) diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index 6330a08a4d..6231e61a06 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -36,10 +36,102 @@ namespace vm // Update reservation status void reservation_update(u32 addr); - // Get reservation sync variable - inline atomic_t& reservation_notifier(u32 addr) + struct reservation_waiter_t { - return *reinterpret_cast*>(g_reservations + (addr & 0xff80) / 2); + u32 wait_flag = 0; + u8 waiters_count = 0; + u8 waiters_index = 0; + }; + + static inline std::pair*, atomic_t*> reservation_notifier(u32 raddr) + { + extern std::array, 512> g_resrv_waiters_count; + + // Storage efficient method to distinguish different nearby addresses (which are likely) + const usz index = std::popcount(raddr & -512) + ((raddr / 128) % 4) * 32; + auto& waiter = g_resrv_waiters_count[index * 4]; + return { &g_resrv_waiters_count[index * 4 + waiter.load().waiters_index % 4], &waiter }; + } + + static inline u32 reservation_notifier_count(u32 raddr) + { + return reservation_notifier(raddr).first->load().waiters_count; + } + + static inline void reservation_notifier_end_wait(atomic_t& waiter) + { + waiter.atomic_op([](reservation_waiter_t& value) + { + if (value.waiters_count-- == 1) + { + value.wait_flag = 0; + } + }); + } + + static inline atomic_t* reservation_notifier_begin_wait(u32 raddr, u64 rtime) + { + atomic_t& waiter = *reservation_notifier(raddr).first; + + waiter.atomic_op([](reservation_waiter_t& value) + { + value.wait_flag = 1; + value.waiters_count++; + }); + + if ((reservation_acquire(raddr) & -128) != rtime) + { + reservation_notifier_end_wait(waiter); + return nullptr; + } + + return &waiter; + } + + static inline atomic_t* reservation_notifier_notify(u32 raddr, bool pospone = false) + { + const auto notifiers = reservation_notifier(raddr); + + if (notifiers.first->load().wait_flag) + { + if (notifiers.first == notifiers.second) + { + if (!notifiers.first->fetch_op([](reservation_waiter_t& value) + { + if (value.waiters_index == 0) + { + value.wait_flag = 0; + value.waiters_count = 0; + value.waiters_index++; + return true; + } + + return false; + }).second) + { + return nullptr; + } + } + else + { + u8 old_index = static_cast(notifiers.first - notifiers.second); + if (!atomic_storage::compare_exchange(notifiers.second->raw().waiters_index, old_index, (old_index + 1) % 4)) + { + return nullptr; + } + + notifiers.first->release(reservation_waiter_t{}); + } + + if (pospone) + { + return utils::bless>(¬ifiers.first->raw().wait_flag); + } + + utils::bless>(¬ifiers.first->raw().wait_flag)->notify_all(); + } + + return nullptr; } u64 reservation_lock_internal(u32, atomic_t&);