From 5b973448bf75a239f34fd9c7698c8a02b04e2ea9 Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Sat, 15 Jun 2024 07:29:56 +0300 Subject: [PATCH] SPU: Better GETLLAR spin detection --- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 10 ++++-- rpcs3/Emu/Cell/SPUThread.cpp | 50 +++++++++++++++++++--------- rpcs3/Emu/Cell/SPUThread.h | 4 +-- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 5020fd8924..b18ef1b122 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -2309,6 +2309,12 @@ public: for (u32 i = 0; i <= s_reg_127; i++) { + if (i == s_reg_sp) + { + // If we postpone R1 store we lose effortless meta-analytical capabilities for little gain + continue; + } + // If store isn't erased, try to sink it if (auto& bs = bqbi->store[i]; bs && bqbi->bb->targets.size() > 1 && !bqbi->does_gpr_barrier_proceed_last_store(i)) { @@ -2447,7 +2453,7 @@ public: } } - spu_log.trace("Postoned r%u store from block 0x%x (single)", i, block_q[bi].first); + spu_log.trace("Postponed r%u store from block 0x%x (single)", i, block_q[bi].first); } else { @@ -2488,7 +2494,7 @@ public: pdt.recalculate(*m_function); dt.recalculate(*m_function); - spu_log.trace("Postoned r%u store from block 0x%x (multiple)", i, block_q[bi].first); + spu_log.trace("Postponed r%u store from block 0x%x (multiple)", i, block_q[bi].first); } ins = edge->getTerminator(); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e58965145d..85b74b88f5 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -444,10 +444,12 @@ waitpkg_func static void __tpause(u32 cycles, u32 cstate) } #endif +static std::array, 128> g_resrv_waiters_count; + static inline atomic_t& get_resrv_waiters_count(u32 raddr) { // Storage efficient method to distinguish different nearby addresses (which are likely) - return spu_thread::g_reservation_waiters[std::popcount(raddr) + ((raddr / 128) % 4) * 32]; + return g_resrv_waiters_count[std::popcount(raddr) + ((raddr / 128) % 4) * 32]; } void do_cell_atomic_128_store(u32 addr, const void* to_write); @@ -1794,9 +1796,9 @@ void spu_thread::cpu_task() const auto type = cpu->get_type(); - if (g_cfg.core.spu_prof) + if (u64 hash = cpu->block_hash) { - return fmt::format("%sSPU[0x%07x] Thread (%s) [0x%05x: %s]", type >= spu_type::raw ? type == spu_type::isolated ? "Iso" : "Raw" : "", cpu->lv2_id, *name_cache.get(), cpu->pc, spu_block_hash_short{atomic_storage::load(cpu->block_hash)}); + return fmt::format("%sSPU[0x%07x] Thread (%s) [0x%05x: %s]", type >= spu_type::raw ? type == spu_type::isolated ? "Iso" : "Raw" : "", cpu->lv2_id, *name_cache.get(), cpu->pc, spu_block_hash_short{atomic_storage::load(hash)}); } return fmt::format("%sSPU[0x%07x] Thread (%s) [0x%05x]", type >= spu_type::raw ? type == spu_type::isolated ? "Iso" : "Raw" : "", cpu->lv2_id, *name_cache.get(), cpu->pc); @@ -4347,8 +4349,6 @@ u32 spu_thread::get_mfc_completed() const bool spu_thread::process_mfc_cmd() { - mfc_cmd_id++; - // Stall infinitely if MFC queue is full while (mfc_size >= 16) [[unlikely]] { @@ -4451,7 +4451,7 @@ bool spu_thread::process_mfc_cmd() if ([&]() -> bool { // Validation that it is indeed GETLLAR spinning (large time window is intentional) - if (last_getllar_addr != addr || last_getllar != pc || mfc_cmd_id - 1 != last_getllar_id || perf0.get() - last_gtsc >= 15'000) + if (last_getllar_addr != addr || last_getllar != pc || last_getllar_gpr1 != gpr[1]._u32[3] || perf0.get() - last_gtsc >= 5'000 || (interrupts_enabled && ch_events.load().mask)) { // Seemingly not getllar_busy_waiting_switch = umax; @@ -4514,7 +4514,7 @@ bool spu_thread::process_mfc_cmd() g_ok++; } - if ((g_ok + g_fail) % 20 == 0 && !getllar_busy_waiting_switch) + if ((g_ok + g_fail) % 200 == 0 && !getllar_busy_waiting_switch) spu_log.trace("SPU wait: count=%d. switch=%d, spin=%d, fail=%d, ok=%d, {%d, %d, %d, %d}", total_wait, getllar_busy_waiting_switch, getllar_spin_count, +g_fail, +g_ok, old_stats[0], old_stats[1], old_stats[2], old_stats[3] ); } else @@ -4550,8 +4550,7 @@ bool spu_thread::process_mfc_cmd() } last_getllar = pc; - last_getllar_id = mfc_cmd_id; - last_gtsc = perf0.get(); + last_getllar_gpr1 = gpr[1]._u32[3]; if (getllar_busy_waiting_switch == 1) { @@ -4583,6 +4582,12 @@ bool spu_thread::process_mfc_cmd() { busy_wait(300); } + + last_gtsc = utils::get_tsc(); + } + else + { + last_gtsc = perf0.get(); } return true; @@ -4625,21 +4630,31 @@ bool spu_thread::process_mfc_cmd() u8& val = getllar_wait_time[pc / 32].front(); val = static_cast(std::min(val + 1, u8{umax})); - last_getllar_id = mfc_cmd_id; - last_gtsc = perf0.get(); + last_gtsc = utils::get_tsc(); return true; } + static atomic_t g_ctr, g_fail; + if (new_time == this_time && res == this_time) { spu_log.trace("RTIME unchanged on address 0x%x", addr); + g_fail++; + // Try to forcefully change timestamp in order to notify threads if (get_resrv_waiters_count(addr) && res.compare_and_swap_test(this_time, this_time + 128)) { vm::reservation_notifier(addr).notify_all(); } } + else + { + g_ctr++; + } + + if ((g_ctr + g_fail) % 200 == 0) + spu_log.trace("SPU 100WAIT: fail=%d, ok=%d", +g_fail, +g_ctr); } } @@ -4656,12 +4671,12 @@ bool spu_thread::process_mfc_cmd() set_events(SPU_EVENT_LR); static_cast(test_stopped()); } + + last_getllar = pc; + last_gtsc = perf0.get(); } - last_getllar_id = mfc_cmd_id; - last_getllar = pc; last_getllar_addr = addr; - last_gtsc = perf0.get(); getllar_spin_count = 0; getllar_busy_waiting_switch = umax; @@ -5234,6 +5249,9 @@ s64 spu_thread::get_ch_value(u32 ch) do_mfc(); } + // Reset GETLLAR metadata + last_getllar_addr = umax; + const s64 out = channel.pop_wait(*this); if (state & cpu_flag::wait) @@ -5590,6 +5608,9 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) case SPU_WrOutIntrMbox: { + // Reset GETLLAR metadata + last_getllar_addr = umax; + if (get_type() >= spu_type::raw) { if (state & cpu_flag::pending) @@ -7003,4 +7024,3 @@ void fmt_class_string::format(std::string& out, u64 arg) DECLARE(spu_thread::g_raw_spu_ctr){}; DECLARE(spu_thread::g_raw_spu_id){}; DECLARE(spu_thread::g_spu_work_count){}; -DECLARE(spu_thread::g_reservation_waiters){}; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 8c6c31fb56..e1163a859d 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -677,7 +677,6 @@ public: // MFC command data spu_mfc_cmd ch_mfc_cmd; - u32 mfc_cmd_id = 0; // MFC command queue spu_mfc_cmd mfc_queue[16]{}; @@ -797,7 +796,7 @@ public: u64 last_succ = 0; u64 last_gtsc = 0; u32 last_getllar = umax; // LS address of last GETLLAR (if matches current GETLLAR we can let the thread rest) - u32 last_getllar_id = umax; + u32 last_getllar_gpr1 = umax; u32 last_getllar_addr = umax; u32 getllar_spin_count = 0; u32 getllar_busy_waiting_switch = umax; // umax means the test needs evaluation, otherwise it's a boolean @@ -896,7 +895,6 @@ public: static atomic_t g_raw_spu_ctr; static atomic_t g_raw_spu_id[5]; static atomic_t g_spu_work_count; - static atomic_t g_reservation_waiters[128]; static u32 find_raw_spu(u32 id) {