From fb24b06a5dc5e429022ce0e6744a1273054672bc Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 29 Oct 2020 21:46:50 +0300 Subject: [PATCH] PPU: add LARX perf counter Also refactor ppu_store_reservation a bit. --- rpcs3/Emu/Cell/PPUThread.cpp | 40 ++++++++++++++++++------------------ rpcs3/Emu/Cell/PPUThread.h | 6 ++++++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 312078f589..434187ba9a 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -854,6 +854,8 @@ ppu_thread::~ppu_thread() { dct->used -= stack_size; } + + perf_log.notice("Perf stats for STCX reload: successs %u, failure %u", last_succ, last_fail); } ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u32 prio, int detached) @@ -1123,6 +1125,8 @@ static void ppu_trace(u64 addr) template static T ppu_load_acquire_reservation(ppu_thread& ppu, u32 addr) { + perf_meter<"LARX"_u32> perf0; + // Do not allow stores accessed from the same cache line to past reservation load std::atomic_thread_fence(std::memory_order_seq_cst); @@ -1354,7 +1358,6 @@ const auto ppu_stcx_accurate_tx = build_function_asm>(addr & -8); auto& res = vm::reservation_acquire(addr, sizeof(T)); - const u64 old_data = reinterpret_cast&>(ppu.rdata[addr & 0x78]); const u64 rtime = ppu.rtime; + be_t old_data = 0; + std::memcpy(&old_data, &ppu.rdata[addr & 0x78], sizeof(old_data)); + be_t new_data = old_data; + if constexpr (sizeof(T) == sizeof(u32)) { // Rebuild reg_value to be 32-bits of new data and 32-bits of old data - union bf64 - { - u64 all; - bf_t low; - bf_t high; - } bf{old_data}; - - if (addr & 4) - bf.low = static_cast(reg_value); - else - bf.high = static_cast(reg_value); - - reg_value = bf.all; + const be_t reg32 = static_cast(reg_value); + std::memcpy(reinterpret_cast(&new_data) + (addr & 4), ®32, sizeof(u32)); + } + else + { + new_data = reg_value; } // Test if store address is on the same aligned 8-bytes memory as load @@ -1665,7 +1664,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) { if (g_use_rtm) [[likely]] { - switch (u32 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, reg_value)) + switch (u32 count = ppu_stcx_accurate_tx(addr & -8, rtime, ppu.rdata, std::bit_cast(new_data))) { case 0: { @@ -1691,7 +1690,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) { if ((res & -128) == rtime && cmp_rdata(ppu.rdata, vm::_ref(addr & -128))) { - data.release(reg_value); + data.release(new_data); res += 127; return true; } @@ -1736,7 +1735,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) if (cmp_rdata(ppu.rdata, super_data)) { - data.release(reg_value); + data.release(new_data); res += 64; return true; } @@ -1748,7 +1747,7 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) return success; } - if (reg_value == old_data) + if (new_data == old_data) { return res.compare_and_swap_test(rtime, rtime + 128); } @@ -1774,7 +1773,8 @@ static bool ppu_store_reservation(ppu_thread& ppu, u32 addr, u64 reg_value) return false; } - if (data.compare_and_swap_test(old_data, reg_value)) + // Store previous value in old_data on failure + if (data.compare_exchange(old_data, new_data)) { res += 127; return true; diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index a31618734e..fbd9cb1051 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -219,6 +219,12 @@ public: // Thread name stx::atomic_cptr ppu_tname; + u64 last_ftsc = 0; + u64 last_ftime = 0; + u32 last_faddr = 0; + u64 last_fail = 0; + u64 last_succ = 0; + be_t* get_stack_arg(s32 i, u64 align = alignof(u64)); void exec_task(); void fast_call(u32 addr, u32 rtoc);