From 82b1a2bd7add711b6e8d02400fb67704cb83d25d Mon Sep 17 00:00:00 2001 From: Eladash Date: Thu, 18 Aug 2022 12:15:32 +0300 Subject: [PATCH] SPU: add the concept of inaccurate reservations Implement cellSpursRequestIdleSpu --- rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 132 ++++++++++++++++++++++----- rpcs3/Emu/Cell/SPUThread.cpp | 21 +++++ rpcs3/Emu/Cell/SPUThread.h | 1 + rpcs3/Emu/system_config.h | 1 + 4 files changed, 134 insertions(+), 21 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 17d6d4792e..75bd11dd27 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -326,7 +326,7 @@ s32 cellSpursReadyCountAdd(ppu_thread& ppu, vm::ptr spurs, u32 wid, v //s32 cellSpursUnsetExceptionEventHandler(); s32 _cellSpursWorkloadFlagReceiver(ppu_thread& ppu, vm::ptr spurs, u32 wid, u32 is_set); //s32 _cellSpursWorkloadFlagReceiver2(); -//s32 cellSpursRequestIdleSpu(); +//error_code cellSpursRequestIdleSpu(); // // SPURS taskset functions @@ -1781,7 +1781,7 @@ s32 cellSpursSetMaxContention(vm::ptr spurs, u32 wid, u32 maxContenti /// Set the priority of a workload on each SPU s32 cellSpursSetPriorities(vm::ptr spurs, u32 wid, vm::cptr priorities) { - cellSpurs.warning("cellSpursSetPriorities(spurs=*0x%x, wid=%d, priorities=*0x%x)", spurs, wid, priorities); + cellSpurs.trace("cellSpursSetPriorities(spurs=*0x%x, wid=%d, priorities=*0x%x)", spurs, wid, priorities); if (!spurs) { @@ -1850,9 +1850,9 @@ s32 cellSpursSetPriority(vm::ptr spurs, u32 wid, u32 spuId, u32 prior if (spurs->exception) return CELL_SPURS_CORE_ERROR_STAT; - vm::light_op(spurs->wklInfo(wid).priority[spuId], [&](u8& v){ atomic_storage::release(v, priority); }); - vm::light_op(spurs->sysSrvMsgUpdateWorkload, [&](atomic_t& v){ v.bit_test_set(spuId); }); - vm::light_op(spurs->sysSrvMessage, [&](atomic_t& v){ v.bit_test_set(spuId); }); + vm::light_op(spurs->wklInfo(wid).priority[spuId], [&](u8& v){ atomic_storage::release(v, priority); }); + vm::light_op(spurs->sysSrvMsgUpdateWorkload, [&](atomic_t& v){ v.bit_test_set(spuId); }); + vm::light_op(spurs->sysSrvMessage, [&](atomic_t& v){ v.bit_test_set(spuId); }); return CELL_OK; } @@ -2357,14 +2357,45 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr return CELL_SPURS_POLICY_MODULE_ERROR_AGAIN; } + auto& spurs_res = vm::reservation_acquire(spurs.addr()); + auto& spurs_res2 = vm::reservation_acquire(spurs.addr() + 0x80); + + if (!spurs_res2.fetch_op([&](u64& r) + { + if (r & vm::rsrv_unique_lock) + { + return false; + } + + r += 1; + return true; + }).second) + { + vm::reservation_shared_lock_internal(spurs_res2); + } + + if (!spurs_res.fetch_op([&](u64& r) + { + if (r & vm::rsrv_unique_lock) + { + return false; + } + + r += 1; + return true; + }).second) + { + vm::reservation_shared_lock_internal(spurs_res); + } + u32 index = wnum & 0xf; if (wnum <= 15) { ensure((spurs->wklCurrentContention[wnum] & 0xf) == 0); ensure((spurs->wklPendingContention[wnum] & 0xf) == 0); - spurs->wklState1[wnum] = SPURS_WKL_STATE_PREPARING; + spurs->wklState1[wnum].release(SPURS_WKL_STATE_PREPARING); spurs->wklStatus1[wnum] = 0; - spurs->wklEvent1[wnum] = 0; + spurs->wklEvent1[wnum].release(0); spurs->wklInfo1[wnum].addr = pm; spurs->wklInfo1[wnum].arg = data; spurs->wklInfo1[wnum].size = size; @@ -2392,15 +2423,15 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr spurs->wklMinContention[wnum] = minContention > 8 ? 8 : minContention; } - spurs->wklReadyCount1[wnum] = 0; + spurs->wklReadyCount1[wnum].release(0); } else { ensure((spurs->wklCurrentContention[index] & 0xf0) == 0); ensure((spurs->wklPendingContention[index] & 0xf0) == 0); - spurs->wklState2[index] = SPURS_WKL_STATE_PREPARING; + spurs->wklState2[index].release(SPURS_WKL_STATE_PREPARING); spurs->wklStatus2[index] = 0; - spurs->wklEvent2[index] = 0; + spurs->wklEvent2[index].release(0); spurs->wklInfo2[index].addr = pm; spurs->wklInfo2[index].arg = data; spurs->wklInfo2[index].size = size; @@ -2422,21 +2453,31 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr spurs->wklEvent2[index] |= 2; } - spurs->wklIdleSpuCountOrReadyCount2[wnum] = 0; + spurs->wklIdleSpuCountOrReadyCount2[wnum].release(0); } - vm::atomic_op(spurs->wklMaxContention[index], [&](u8& v) + spurs->wklMaxContention[index].atomic_op([&](u8& v) { v &= (wnum <= 15 ? 0xf0 : 0x0f); v |= (maxContention > 8 ? 8 : maxContention) << (wnum < CELL_SPURS_MAX_WORKLOAD ? 0 : 4); }); - vm::atomic_op((wnum <= 15 ? spurs->wklSignal1 : spurs->wklSignal2), [&](be_t& data) + (wnum <= 15 ? spurs->wklSignal1 : spurs->wklSignal2).atomic_op([&](be_t& data) { data &= ~(0x8000 >> index); }); - spurs->wklFlagReceiver.compare_and_swap(wnum, 0xff); + // Attempt to avoid CAS + if (spurs->wklFlagReceiver == wnum && spurs->wklFlagReceiver.compare_and_swap(wnum, 0xff)) + { + // + } + + spurs_res += 127; + spurs_res2 += 127; + + spurs_res.notify_all(-128); + spurs_res2.notify_all(-128); u32 res_wkl; const auto wkl = &spurs->wklInfo(wnum); @@ -2470,7 +2511,7 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr }); ensure((res_wkl <= 31)); - vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t& v){ v.release(0xff); }); + vm::light_op(spurs->sysSrvMsgUpdateWorkload, [](atomic_t& v){ v.release(0xff); }); vm::light_op(spurs->sysSrvMessage, [](atomic_t& v){ v.release(0xff); }); return CELL_OK; } @@ -2478,7 +2519,7 @@ s32 _spurs::add_workload(ppu_thread& ppu, vm::ptr spurs, vm::ptr /// Add workload s32 cellSpursAddWorkload(ppu_thread& ppu, vm::ptr spurs, vm::ptr wid, vm::cptr pm, u32 size, u64 data, vm::cptr priority, u32 minCnt, u32 maxCnt) { - cellSpurs.warning("cellSpursAddWorkload(spurs=*0x%x, wid=*0x%x, pm=*0x%x, size=0x%x, data=0x%llx, priority=*0x%x, minCnt=0x%x, maxCnt=0x%x)", + cellSpurs.trace("cellSpursAddWorkload(spurs=*0x%x, wid=*0x%x, pm=*0x%x, size=0x%x, data=0x%llx, priority=*0x%x, minCnt=0x%x, maxCnt=0x%x)", spurs, wid, pm, size, data, priority, minCnt, maxCnt); return _spurs::add_workload(ppu, spurs, wid, pm, size, data, *priority, minCnt, maxCnt, vm::null, vm::null, vm::null, vm::null); @@ -2487,7 +2528,7 @@ s32 cellSpursAddWorkload(ppu_thread& ppu, vm::ptr spurs, vm::ptr /// Add workload s32 cellSpursAddWorkloadWithAttribute(ppu_thread& ppu, vm::ptr spurs, vm::ptr wid, vm::cptr attr) { - cellSpurs.warning("cellSpursAddWorkloadWithAttribute(spurs=*0x%x, wid=*0x%x, attr=*0x%x)", spurs, wid, attr); + cellSpurs.trace("cellSpursAddWorkloadWithAttribute(spurs=*0x%x, wid=*0x%x, attr=*0x%x)", spurs, wid, attr); if (!attr) { @@ -2647,7 +2688,7 @@ s32 cellSpursRemoveSystemWorkloadForUtility() /// Remove workload s32 cellSpursRemoveWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid) { - cellSpurs.warning("cellSpursRemoveWorkload(spurs=*0x%x, wid=%u)", spurs, wid); + cellSpurs.trace("cellSpursRemoveWorkload(spurs=*0x%x, wid=%u)", spurs, wid); if (!spurs) return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; @@ -3101,9 +3142,42 @@ s32 _cellSpursWorkloadFlagReceiver2() } /// Request assignment of idle SPUs -s32 cellSpursRequestIdleSpu() +s32 cellSpursRequestIdleSpu(vm::ptr spurs, u32 wid, u32 count) { - UNIMPLEMENTED_FUNC(cellSpurs); + cellSpurs.trace("cellSpursRequestIdleSpu(spurs=*0x%x, wid=%d, count=%d)", spurs, wid, count); + + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (!spurs.aligned()) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + // Old API: This function doesn't support 32 workloads + if (spurs->flags1 & SF1_32_WORKLOADS) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + if (wid >= CELL_SPURS_MAX_WORKLOAD || count >= CELL_SPURS_MAX_SPU) + { + return CELL_SPURS_CORE_ERROR_INVAL; + } + + if ((spurs->wklEnabled.load() & (0x80000000u >> wid)) == 0u) + { + return CELL_SPURS_CORE_ERROR_SRCH; + } + + if (spurs->exception) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + vm::light_op(spurs->wklIdleSpuCountOrReadyCount2[wid], FN(x.release(static_cast(count)))); return CELL_OK; } @@ -5221,7 +5295,7 @@ s32 cellSpursSemaphoreGetTasksetAddress() return CELL_OK; } -DECLARE(ppu_module_manager::cellSpurs)("cellSpurs", []() +DECLARE(ppu_module_manager::cellSpurs)("cellSpurs", [](ppu_static_module* _this) { // Core REG_FUNC(cellSpurs, cellSpursInitialize); @@ -5383,4 +5457,20 @@ DECLARE(ppu_module_manager::cellSpurs)("cellSpurs", []() REG_FUNC(cellSpurs, cellSpursTraceStart); REG_FUNC(cellSpurs, cellSpursTraceStop); REG_FUNC(cellSpurs, cellSpursTraceFinalize); + + _this->add_init_func([](ppu_static_module*) + { + const auto val = g_cfg.core.spu_accurate_reservations ? MFF_PERFECT : MFF_FORCED_HLE; + + REINIT_FUNC(cellSpursSetPriorities).flag(val); + REINIT_FUNC(cellSpursAddWorkload).flag(val); + REINIT_FUNC(cellSpursAddWorkloadWithAttribute).flag(val); + REINIT_FUNC(cellSpursShutdownWorkload).flag(val); + REINIT_FUNC(cellSpursReadyCountStore).flag(val); + REINIT_FUNC(cellSpursSetPriority).flag(val); + REINIT_FUNC(cellSpursTraceInitialize).flag(val); + REINIT_FUNC(cellSpursWaitForWorkloadShutdown).flag(val); + REINIT_FUNC(cellSpursRequestIdleSpu).flag(val); + REINIT_FUNC(cellSpursRemoveWorkload).flag(val); + }); }); diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 3aa2fdf12c..0f1b1d347b 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1453,6 +1453,20 @@ void spu_thread::cpu_task() return fmt::format("%sSPU[0x%07x] Thread (%s) [0x%05x]", type >= spu_type::raw ? type == spu_type::isolated ? "Iso" : "Raw" : "", cpu->lv2_id, *name_cache.get(), cpu->pc); }; + if (!spurs_addr) + { + // Evaluate it + if (!group) + { + spurs_addr = -0x80; // Some invalid non-0 address + } + else + { + const u32 arg = static_cast(group->args[index][1]); + spurs_addr = group->name.ends_with("CellSpursKernelGroup"sv) && vm::check_addr(arg) ? arg : 0u - 0x80; + } + } + if (jit) { while (true) @@ -2827,6 +2841,13 @@ bool spu_thread::do_putllc(const spu_mfc_cmd& args) return false; } + if (!g_cfg.core.spu_accurate_reservations && addr - spurs_addr <= 0x80) + { + mov_rdata(vm::_ref(addr), to_write); + res += 64; + return true; + } + if (g_use_rtm) [[likely]] { switch (u64 count = spu_putllc_tx(addr, rtime, rdata, to_write)) diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 190c9c23d8..8e06090dc7 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -814,6 +814,7 @@ public: const std::add_pointer_t ls; // SPU LS pointer const u32 option; // sys_spu_thread_initialize option const u32 lv2_id; // The actual id that is used by syscalls + u32 spurs_addr = 0; spu_thread* next_cpu{}; // LV2 thread queues' node link diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 2eae72926d..9ed396d0c6 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -41,6 +41,7 @@ struct cfg_root : cfg::node cfg::_enum spu_block_size{ this, "SPU Block Size", spu_block_size_type::safe }; cfg::_bool spu_accurate_getllar{ this, "Accurate GETLLAR", false, true }; cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false }; + cfg::_bool spu_accurate_reservations{ this, "Accurate SPU Reservations", true }; cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false }; cfg::_bool rsx_accurate_res_access{this, "Accurate RSX reservation access", false, true};