From 89f124814089981aeedcf1d1edab987d9ba31c88 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 7 Oct 2020 01:14:35 +0300 Subject: [PATCH] Implement vm::reservation_op Implement vm::reservation_peek (memory load) Implement vm::unsafe_ptr_cast helper Example use in cellSpurs.cpp Fix dma_lockb value and description --- rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 86 ++------ rpcs3/Emu/Cell/Modules/cellSpurs.h | 23 ++- rpcs3/Emu/Memory/vm.cpp | 47 +++++ rpcs3/Emu/Memory/vm_ptr.h | 7 + rpcs3/Emu/Memory/vm_reservation.h | 296 ++++++++++++++++++++++++++- 5 files changed, 388 insertions(+), 71 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 1df6704a0c..f9a34c4a4e 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -160,32 +160,6 @@ extern u32 ppu_lwarx(ppu_thread&, u32); extern bool ppu_stwcx(ppu_thread&, u32, u32); extern bool ppu_stdcx(ppu_thread&, u32, u64); -bool do_atomic_128_load(cpu_thread& cpu, u32 addr, void* dst) -{ - verify(HERE), (addr % 128) == 0; - - while (!cpu.test_stopped()) - { - const u64 rtime = vm::reservation_acquire(addr, 128); - - if (rtime % 128) - { - continue; - } - - std::memcpy(dst, vm::base(addr), 128); - - if (rtime != vm::reservation_acquire(addr, 128)) - { - continue; - } - - return true; - } - - return false; -} - error_code sys_spu_image_close(ppu_thread&, vm::ptr img); //---------------------------------------------------------------------------- @@ -2516,7 +2490,7 @@ s32 cellSpursShutdownWorkload(ppu_thread& ppu, vm::ptr spurs, u32 wid if (wid >= (spurs->flags1 & SF1_32_WORKLOADS ? CELL_SPURS_MAX_WORKLOAD2 : CELL_SPURS_MAX_WORKLOAD)) return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; - + if (spurs->exception) return CELL_SPURS_POLICY_MODULE_ERROR_STAT; @@ -4393,7 +4367,7 @@ s32 _spurs::check_job_chain_attribute(u32 sdkVer, vm::cptr jcEntry, u16 siz { if (!jcEntry) return CELL_SPURS_JOB_ERROR_NULL_POINTER; - + if (!jcEntry.aligned()) return CELL_SPURS_JOB_ERROR_ALIGN; @@ -4592,13 +4566,12 @@ s32 cellSpursGetJobChainInfo(ppu_thread& ppu, vm::ptr jobChai return err; } - CellSpursJobChain data; - // Read the commands queue atomically - if (!do_atomic_128_load(ppu, jobChain.addr(), &data)) + CellSpursJobChain data; + vm::reservation_peek(ppu, vm::unsafe_ptr_cast(jobChain), [&](const CellSpursJobChain_x00& jch) { - return 0; - } + std::memcpy(&data, &jch, sizeof(jch)); + }); info->linkRegister[0] = +data.linkRegister[0]; info->linkRegister[1] = +data.linkRegister[1]; @@ -4896,48 +4869,25 @@ s32 cellSpursAddUrgentCommand(ppu_thread& ppu, vm::ptr jobCha if (jobChain->workloadId >= CELL_SPURS_MAX_WORKLOAD2) return CELL_SPURS_JOB_ERROR_INVAL; - for (u32 i = 0;;) + s32 result = CELL_OK; + + vm::reservation_op(vm::unsafe_ptr_cast(jobChain), [&](CellSpursJobChain_x00& jch) { - if (i >= std::size(jobChain->urgentCmds)) + for (auto& cmd : jch.urgentCmds) { - // Exausted all slots - return CELL_SPURS_JOB_ERROR_BUSY; - } - - u64 currCmd = ppu_ldarx(ppu, jobChain.ptr(&CellSpursJobChain::urgentCmds, i).addr()); - std::atomic_thread_fence(std::memory_order_acq_rel); - - bool found = false; - bool reset = false; - - if (!currCmd) - { - if (i != 0 && !jobChain->urgentCmds[i - 1]) + if (!cmd) { - // Restart search, someone emptied out the previous one - reset = true; - } - else - { - found = true; - currCmd = newCmd; + cmd = newCmd; + return true; } } - if (reset || !ppu_stdcx(ppu, jobChain.ptr(&CellSpursJobChain::urgentCmds, i).addr(), currCmd)) - { - // Someone modified the job chain or the previous slot is empty, restart search - i = 0; - continue; - } + // Considered unlikely so unoptimized + result = CELL_SPURS_JOB_ERROR_BUSY; + return false; + }); - if (found) - break; - - i++; - } - - return CELL_OK; + return result; } s32 cellSpursAddUrgentCall(ppu_thread& ppu, vm::ptr jobChain, vm::ptr commandList) diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.h b/rpcs3/Emu/Cell/Modules/cellSpurs.h index 6485eac0d2..4adfff87ae 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.h +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.h @@ -465,6 +465,25 @@ struct alignas(128) CellSpursJobChain u8 unk5[0x100 - 0xA8]; }; +struct alignas(128) CellSpursJobChain_x00 +{ + vm::bcptr pc; // 0x00 + vm::bcptr linkRegister[3]; // 0x08 + u8 unk0[0x3]; // 0x20 + b8 isHalted; // 0x23 + b8 autoReadyCount; // 0x24 + u8 unk1[0x7]; // 0x25 + u8 val2C; // 0x2C + u8 val2D; // 0x2D + u8 val2E; // 0x2E + u8 val2F; // 0x2F + be_t urgentCmds[4]; // 0x30 + u8 unk2[0x22]; // 0x50 + be_t maxGrabbedJob; // 0x72 + be_t workloadId; // 0x74 + vm::bptr spurs; // 0x78 +}; + struct CellSpursJobChainInfo { be_t urgentCommandSlot[4]; // 0x00 @@ -494,7 +513,7 @@ struct alignas(8) CellSpursJobChainAttribute be_t maxGrabbedJob; // 0x0E u8 priorities[8]; // 0x10 be_t maxContention; // 0x18 - b8 autoSpuCount; // 0x1C + b8 autoSpuCount; // 0x1C u8 padding[3]; // 0x1D be_t tag1; // 0x20 be_t tag2; // 0x24 @@ -1031,7 +1050,7 @@ struct alignas(16) CellSpursTaskBinInfo struct alignas(128) CellSpursBarrier { - be_t zero; // 0x00 + be_t zero; // 0x00 be_t remained; // 0x04 u8 unk0[0x34 - 0x8]; vm::bptr taskset; // 0x34 diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index d8ba1ef808..0251f28dc8 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -13,6 +13,7 @@ #include "Emu/CPU/CPUThread.h" #include "Emu/Cell/lv2/sys_memory.h" #include "Emu/RSX/GSRender.h" +#include "Emu/Cell/SPURecompiler.h" #include #include #include @@ -470,6 +471,52 @@ namespace vm } } + void reservation_op_internal(u32 addr, std::function func) + { + const auto _cpu = get_current_cpu_thread(); + + // Acknowledge contender if necessary (TODO: check) + _cpu->state += cpu_flag::wait; + + { + cpu_thread::suspend_all cpu_lock(_cpu); + + // Wait to acquire PUTLLUC lock + while (vm::reservation_acquire(addr, 128).bts(std::countr_zero(vm::putlluc_lockb))) + { + busy_wait(100); + } + + if (func()) + { + // Success, release PUTLLUC and PUTLLC locks if necessary + vm::reservation_acquire(addr, 128) += 63; + } + else + { + // Fake update (TODO) + vm::reservation_acquire(addr, 128) += 63; + } + } + + vm::reservation_notifier(addr, 128).notify_all(); + } + + void reservation_escape_internal() + { + const auto _cpu = get_current_cpu_thread(); + + if (_cpu && _cpu->id_type() == 1) + { + thread_ctrl::emergency_exit("vm::reservation_escape"); + } + + if (_cpu && _cpu->id_type() == 2) + { + spu_runtime::g_escape(static_cast(_cpu)); + } + } + static void _page_map(u32 addr, u8 flags, u32 size, utils::shm* shm) { if (!size || (size | addr) % 4096 || flags & page_allocated) diff --git a/rpcs3/Emu/Memory/vm_ptr.h b/rpcs3/Emu/Memory/vm_ptr.h index d9f71532c4..dd77c453a1 100644 --- a/rpcs3/Emu/Memory/vm_ptr.h +++ b/rpcs3/Emu/Memory/vm_ptr.h @@ -323,6 +323,13 @@ namespace vm { return vm::cast(other.addr(), HERE); } + + // Perform reinterpret cast + template *>(std::declval()))> + inline _ptr_base, u32> unsafe_ptr_cast(const _ptr_base& other) + { + return vm::cast(other.addr(), HERE); + } } struct null_t diff --git a/rpcs3/Emu/Memory/vm_reservation.h b/rpcs3/Emu/Memory/vm_reservation.h index 6320c91e7d..fd593c28db 100644 --- a/rpcs3/Emu/Memory/vm_reservation.h +++ b/rpcs3/Emu/Memory/vm_reservation.h @@ -1,15 +1,19 @@ #pragma once #include "vm.h" +#include "vm_locking.h" #include "Utilities/cond.h" #include "util/atomic.hpp" +#include + +extern bool g_use_rtm; namespace vm { enum reservation_lock_bit : u64 { stcx_lockb = 1 << 0, // Exclusive conditional reservation lock - dma_lockb = 1 << 1, // Inexclusive unconditional reservation lock + dma_lockb = 1 << 5, // Exclusive unconditional reservation lock putlluc_lockb = 1 << 6, // Exclusive unconditional reservation lock }; @@ -69,4 +73,294 @@ namespace vm return {*res, rtime}; } + + void reservation_op_internal(u32 addr, std::function func); + + template + SAFE_BUFFERS inline auto reservation_op(_ptr_base ptr, F op) + { + // Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply + static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_op: unsupported type"); + static_assert(std::is_trivially_copyable_v, "vm::reservation_op: not triv copyable (optimization)"); + + // Use "super" pointer to prevent access violation handling during atomic op + const auto sptr = vm::get_super_ptr(static_cast(ptr.addr())); + + // Use 128-byte aligned addr + const u32 addr = static_cast(ptr.addr()) & -128; + + if (g_use_rtm) + { + auto& res = vm::reservation_acquire(addr, 128); + + // Stage 1: single optimistic transaction attempt + unsigned status = _XBEGIN_STARTED; + +#ifndef _MSC_VER + __asm__ goto ("xbegin %l[stage2];" ::: "memory" : stage2); +#else + status = _xbegin(); + if (status == _XBEGIN_STARTED) +#endif + { + if constexpr (std::is_void_v>) + { + res += 128; + std::invoke(op, *sptr); +#ifndef _MSC_VER + __asm__ volatile ("xend;" ::: "memory"); +#else + _xend(); +#endif + res.notify_all(); + return; + } + else + { + if (auto result = std::invoke(op, *sptr)) + { + res += 128; +#ifndef _MSC_VER + __asm__ volatile ("xend;" ::: "memory"); +#else + _xend(); +#endif + res.notify_all(); + return result; + } + else + { +#ifndef _MSC_VER + __asm__ volatile ("xabort $1;" ::: "memory"); +#else + _xabort(1); +#endif + // Unreachable code + return std::invoke_result_t(); + } + } + } + + stage2: +#ifndef _MSC_VER + __asm__ volatile ("movl %%eax, %0;" : "=r" (status) :: "memory"); +#endif + if constexpr (!std::is_void_v>) + { + if (_XABORT_CODE(status)) + { + // Unfortunately, actual function result is not recoverable in this case + return std::invoke_result_t(); + } + } + + // Touch memory if transaction failed without RETRY flag on the first attempt (TODO) + if (!(status & _XABORT_RETRY)) + { + reinterpret_cast*>(sptr)->fetch_add(0); + } + + // Stage 2: try to lock reservation first + res += stcx_lockb; + + // Start lightened transaction (TODO: tweaking) + while (true) + { +#ifndef _MSC_VER + __asm__ goto ("xbegin %l[retry];" ::: "memory" : retry); +#else + status = _xbegin(); + + if (status != _XBEGIN_STARTED) [[unlikely]] + { + goto retry; + } +#endif + if constexpr (std::is_void_v>) + { + std::invoke(op, *sptr); +#ifndef _MSC_VER + __asm__ volatile ("xend;" ::: "memory"); +#else + _xend(); +#endif + res += 127; + res.notify_all(); + return; + } + else + { + if (auto result = std::invoke(op, *sptr)) + { +#ifndef _MSC_VER + __asm__ volatile ("xend;" ::: "memory"); +#else + _xend(); +#endif + res += 127; + res.notify_all(); + return result; + } + else + { +#ifndef _MSC_VER + __asm__ volatile ("xabort $1;" ::: "memory"); +#else + _xabort(1); +#endif + return std::invoke_result_t(); + } + } + + retry: +#ifndef _MSC_VER + __asm__ volatile ("movl %%eax, %0;" : "=r" (status) :: "memory"); +#endif + if (!(status & _XABORT_RETRY)) [[unlikely]] + { + if constexpr (!std::is_void_v>) + { + if (_XABORT_CODE(status)) + { + res -= 1; + return std::invoke_result_t(); + } + } + + break; + } + } + + // Stage 3: all failed, heavyweight fallback (see comments at the bottom) + if constexpr (std::is_void_v>) + { + return vm::reservation_op_internal(addr, [&] + { + std::invoke(op, *sptr); + return true; + }); + } + else + { + auto result = std::invoke_result_t(); + + vm::reservation_op_internal(addr, [&] + { + T buf = *sptr; + + if ((result = std::invoke(op, buf))) + { + *sptr = buf; + return true; + } + else + { + return false; + } + }); + + return result; + } + } + + + // Perform under heavyweight lock + auto& res = vm::reservation_acquire(addr, 128); + + res += stcx_lockb; + + // Write directly if the op cannot fail + if constexpr (std::is_void_v>) + { + { + vm::writer_lock lock(addr); + std::invoke(op, *sptr); + res += 127; + } + + res.notify_all(); + return; + } + else + { + // Make an operational copy of data (TODO: volatile storage?) + auto result = std::invoke_result_t(); + + { + vm::writer_lock lock(addr); + T buf = *sptr; + + if ((result = std::invoke(op, buf))) + { + // If operation succeeds, write the data back + *sptr = buf; + res += 127; + } + else + { + // Operation failed, no memory has been modified + res -= 1; + return std::invoke_result_t(); + } + } + + res.notify_all(); + return result; + } + } + + // For internal usage + void reservation_escape_internal(); + + // Read memory value in pseudo-atomic manner + template + SAFE_BUFFERS inline auto reservation_peek(CPU&& cpu, _ptr_base ptr, F op) + { + // Atomic operation will be performed on aligned 128 bytes of data, so the data size and alignment must comply + static_assert(sizeof(T) <= 128 && alignof(T) == sizeof(T), "vm::reservation_peek: unsupported type"); + + // Use "super" pointer to prevent access violation handling during atomic op + const auto sptr = vm::get_super_ptr(static_cast(ptr.addr())); + + // Use 128-byte aligned addr + const u32 addr = static_cast(ptr.addr()) & -128; + + while (true) + { + if constexpr (std::is_class_v>) + { + if (cpu.test_stopped()) + { + reservation_escape_internal(); + } + } + + const u64 rtime = vm::reservation_acquire(addr, 128); + + if (rtime & 127) + { + continue; + } + + // Observe data non-atomically and make sure no reservation updates were made + if constexpr (std::is_void_v>) + { + std::invoke(op, *sptr); + + if (rtime == vm::reservation_acquire(addr, 128)) + { + return; + } + } + else + { + auto res = std::invoke(op, *sptr); + + if (rtime == vm::reservation_acquire(addr, 128)) + { + return res; + } + } + } + } } // namespace vm